Automation init...3
This commit is contained in:
parent
7501b95498
commit
53b25f2733
@ -24,9 +24,11 @@ class InvenCrawler extends CrawlerController
|
||||
case 'video':
|
||||
$url = parent::getUrlByMediaType($node, $media_type, $attr);
|
||||
//그래도 null이면 data-src로 추출해본다.
|
||||
$attributes = $node->extract(['data-src']);
|
||||
if (count($attributes)) {
|
||||
$url = $attributes[0];
|
||||
if ($url === null) {
|
||||
$attributes = $node->extract(['data-src']);
|
||||
if (count($attributes)) {
|
||||
$url = $attributes[0];
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'img':
|
||||
@ -53,49 +55,10 @@ class InvenCrawler extends CrawlerController
|
||||
// </div>
|
||||
// <!-- ============== End CONTENT ============== -->
|
||||
// </div>
|
||||
protected function detail_content_process(int $cnt, array $listInfo): array
|
||||
protected function getDetailSelector(array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$selector = $this->getSelector($response, getenv("inven.view.content.tag"));
|
||||
$formDatas = [];
|
||||
$formDatas['image_path'] = "";
|
||||
$formDatas['content'] = $selector->html();
|
||||
//File DB 및 Board DB 등록작업등
|
||||
$this->getBoardModel()->createByCrawler(
|
||||
$this->getBoardsEntity(),
|
||||
$this->getUserEntity(),
|
||||
$cnt,
|
||||
$listInfo,
|
||||
[],
|
||||
$formDatas
|
||||
);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
}
|
||||
protected function detail_download_process(int $cnt, array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$selector = $this->getSelector($response, getenv("inven.view.content.tag"));
|
||||
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->isDebug) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
var_export($listInfo, true),
|
||||
var_export($media_urls, true)
|
||||
));
|
||||
} else {
|
||||
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
||||
$storages = $this->media_process($media_urls);
|
||||
if (!count($storages)) {
|
||||
throw new \Exception("등록할 자료가 없습니다.");
|
||||
}
|
||||
$this->backend_process($cnt, $listInfo, $storages);
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
return array($this->getSelector($response, getenv("inven.view.content.tag")), $listInfo);
|
||||
}
|
||||
//리스트내용
|
||||
// <div class="board-list">
|
||||
@ -151,7 +114,6 @@ class InvenCrawler extends CrawlerController
|
||||
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
|
||||
$date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
|
||||
$nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("inven.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
|
||||
@ -1,17 +1,23 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler\Mangboard;
|
||||
namespace App\Controllers\Mangboard\Crawler;
|
||||
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use App\Libraries\MyCrawler\MangboardCrawler;
|
||||
use DateTime;
|
||||
use App\Controllers\Mangboard\CrawlerController;
|
||||
use CodeIgniter\HTTP\RequestInterface;
|
||||
use CodeIgniter\HTTP\ResponseInterface;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use DateTime;
|
||||
|
||||
class SirCrawler extends MangboardCrawler
|
||||
class SirCrawler extends CrawlerController
|
||||
{
|
||||
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
public function initController(RequestInterface $request, ResponseInterface $response, LoggerInterface $logger)
|
||||
{
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
parent::initController($request, $response, $logger);
|
||||
}
|
||||
final protected function getHost(): string
|
||||
{
|
||||
return getenv("sir.host.url");
|
||||
}
|
||||
protected function changeURLByCrawler(string $url): string
|
||||
{
|
||||
@ -68,8 +74,8 @@ class SirCrawler extends MangboardCrawler
|
||||
// <source src="//sir.kr/data/file/cm_humor/3535243533_CiH6Iv9O_ee170eeec15e748d9bfcc895836c71d9829c07fb.mp4" type="video/mp4" />
|
||||
// </video>
|
||||
// </div>
|
||||
// <p>..</p><div style=
|
||||
protected function detail_process(int $cnt, array $listInfo): array
|
||||
//
|
||||
protected function getDetailSelector(array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
//작성시간
|
||||
@ -78,70 +84,25 @@ class SirCrawler extends MangboardCrawler
|
||||
$listInfo['date'] = trim($selector->text());
|
||||
$listInfo['date'] = DateTime::createFromFormat('Y.m.d H:i:s', $listInfo['date']);
|
||||
$listInfo['date'] = $listInfo['date']->format('Y-m-d H:i:s');
|
||||
// if ($this->isDebug) {
|
||||
// throw new \Exception(
|
||||
// sprintf(
|
||||
// "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
|
||||
// __FUNCTION__,
|
||||
// var_export($listInfo, true),
|
||||
// $selector->html()
|
||||
// )
|
||||
// );
|
||||
// }
|
||||
//작성내용
|
||||
$selector = $this->getSelector($response, getenv("sir.view.content.tag"));
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->isDebug) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
var_export($listInfo, true),
|
||||
var_export($media_urls, true)
|
||||
));
|
||||
} else {
|
||||
$storages = $this->media_process($media_urls);
|
||||
if (!count($storages)) {
|
||||
throw new \Exception("등록할 자료가 없습니다.");
|
||||
}
|
||||
$this->backend_process($cnt, $listInfo, $storages);
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
return array($this->getSelector($response, tag: getenv("sir.view.content.tag")), $listInfo);
|
||||
}
|
||||
//리스트내용
|
||||
// <li class="lbo_li li_bg0 lbo_like" style="z-index:30">
|
||||
// <div class="li_title" style="margin:0 270px 0 50px;">
|
||||
// <a href="//sir.kr/cm_humor/191449" class="title_link">할아버지의 마술 <i class="co-ico co-ico-small fa fa-folder-o"></i><i class="co-ico co-ico-small fa fa-play-circle"></i><span class="cnt_cmt">3</span></a>
|
||||
// <div class="li_num">21967</div>
|
||||
// <div class="li_info">
|
||||
// <span class="info_span info_nick">
|
||||
// <span class="sv_wrap">
|
||||
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" class="sv_member" title="감독님 자기소개" target="_blank" rel="nofollow" onclick="return false;"><span class="sir_mb_icon"></span> <span class="member">감독님</span></a>
|
||||
// <span class="sv">
|
||||
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
|
||||
// <a href="//sir.kr/cm_humor?sca=&sfl=mb_id,1&stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
|
||||
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
|
||||
// </span>
|
||||
// <noscript class="sv_nojs">
|
||||
// <span class="sv">
|
||||
// <a href="//sir.kr/bbs/profile.php?mb_id=hadirector" onclick="win_profile(this.href); return false;"><i class="fa fa-user" aria-hidden="true"></i> 자기소개</a>
|
||||
// <a href="//sir.kr/cm_humor?sca=&sfl=mb_id,1&stx=hadirector"><i class="fa fa-search" aria-hidden="true"></i> 아이디로 검색</a>
|
||||
// <a href="//sir.kr/main/member/?mb_id=hadirector"><i class="fa fa-file-text-o" aria-hidden="true"></i> 회원게시물</a>
|
||||
// </span>
|
||||
// </noscript>
|
||||
// </span>
|
||||
// </span>
|
||||
// <span class="info_span info_date"> 24.09.13</span>
|
||||
// <span class="info_span info_like">
|
||||
// <span class="like_good1">5</span>
|
||||
// </span>
|
||||
// <span class="info_span info_hit">244</span>
|
||||
// </div>
|
||||
// </li>
|
||||
public function execute(): void
|
||||
//리스트 내용
|
||||
// <td class="listvisited mobile-td subject-view">
|
||||
// <a href="board-read.asp?fullboardname=yamoonfreeboard&mtablename=humor&num=89372&ref=85575&page=1" class="ya-tooltip mobile-bold mobile-height" title="<p><br><br><video autoplay="autoplay" loop="loop" muted="" controls="controls" width="560"" height=" "> <source src=" https://files.bepick.net/bbs/2024/09/c2a20ab5771cbb934940551859fce1c8_769966583.mp4 "> </video><br><br><br></p">
|
||||
// 졸고 있는 여군</a>
|
||||
// <i class="fa fa-commenting-o" aria-hidden="true"></i> <span class="color-red small">6</span>
|
||||
// <span class="visible-xs visible-sm small"><i class="fa fa-user-o" aria-hidden="true"></i> yeeyuu | <i class="fa fa-thumbs-o-up" aria-hidden="true"></i> 6 | <i class="fa fa-eye" aria-hidden="true"></i> 369 | No 89372 | 2024-09-13</span>
|
||||
// </td>
|
||||
public function execute(string $board_name, string $user_id = null, ...$params): void
|
||||
{
|
||||
try {
|
||||
//추가옵션
|
||||
$this->isDebug = in_array('debug', $params);
|
||||
$this->isCopy = in_array('copy', $params);
|
||||
$this->setBoardName($board_name);
|
||||
$this->login_process($user_id);
|
||||
//실행
|
||||
$listInfos = [];
|
||||
if ($this->isDebug) {
|
||||
$listInfo = [];
|
||||
$listInfo['title'] = 'test_title';
|
||||
@ -149,12 +110,10 @@ class SirCrawler extends MangboardCrawler
|
||||
$listInfo['hit'] = 1;
|
||||
$listInfo['date'] = date("Y-m-d H:i:s");
|
||||
$listInfo['detail_url'] = getenv("sir.view.test.url.{$this->getBoardName()}");
|
||||
$this->detail_process(1, $listInfo);
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
|
||||
$listInfos[] = $listInfo;
|
||||
} else {
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->getBoardName()}"));
|
||||
$this->getSelector($response, getenv("sir.list.tag"))->each(
|
||||
$this->getSelector($response, getenv("sir.list.tag.{$this->getBoardName()}"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
|
||||
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
|
||||
@ -167,11 +126,11 @@ class SirCrawler extends MangboardCrawler
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
$this->list_process(intval(getenv("sir.list.max_limit")), $listInfos);
|
||||
}
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
$this->list_process(intval(getenv("sir.list.max_limit.{$this->getBoardName()}")), $listInfos);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
@ -18,48 +18,10 @@ class YamapCrawler extends CrawlerController
|
||||
{
|
||||
return getenv("yamap.host.url");
|
||||
}
|
||||
protected function detail_content_process(int $cnt, array $listInfo): array
|
||||
protected function getDetailSelector(array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
|
||||
$formDatas = [];
|
||||
$formDatas['image_path'] = "";
|
||||
$formDatas['content'] = $selector->html();
|
||||
//File DB 및 Board DB 등록작업등
|
||||
$this->getBoardModel()->createByCrawler(
|
||||
$this->getBoardsEntity(),
|
||||
$this->getUserEntity(),
|
||||
$cnt,
|
||||
$listInfo,
|
||||
[],
|
||||
$formDatas
|
||||
);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
}
|
||||
protected function detail_download_process(int $cnt, array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->isDebug) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
var_export($listInfo, true),
|
||||
var_export($media_urls, true)
|
||||
));
|
||||
} else {
|
||||
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
||||
$storages = $this->media_process($media_urls);
|
||||
if (!count($storages)) {
|
||||
throw new \Exception("등록할 자료가 없습니다.");
|
||||
}
|
||||
$this->backend_process($cnt, $listInfo, $storages);
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
return array($this->getSelector($response, getenv("yamap.view.content.tag")), $listInfo);
|
||||
}
|
||||
//리스트내용
|
||||
// <div class="panel panel-default">
|
||||
@ -91,7 +53,8 @@ class YamapCrawler extends CrawlerController
|
||||
// </div>
|
||||
// <div id="freesubframe"></div>
|
||||
// </div>
|
||||
public function execute(string $board_name, string $user_id = null, ...$params): void
|
||||
|
||||
final public function execute(string $board_name, string $user_id = null, ...$params): void
|
||||
{
|
||||
try {
|
||||
//추가옵션
|
||||
@ -99,8 +62,6 @@ class YamapCrawler extends CrawlerController
|
||||
$this->isCopy = in_array('copy', $params);
|
||||
$this->setBoardName($board_name);
|
||||
$this->login_process($user_id);
|
||||
//실행
|
||||
$listInfos = [];
|
||||
if ($this->isDebug) {
|
||||
$listInfo = [];
|
||||
$listInfo['title'] = 'test_title';
|
||||
@ -111,17 +72,20 @@ class YamapCrawler extends CrawlerController
|
||||
$listInfos[] = $listInfo;
|
||||
} else {
|
||||
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->getBoardName()}"));
|
||||
$selector = $this->getSelector($response, getenv("inven.list.tag.{$this->getBoardName()}"));
|
||||
$selector->filter(getenv("yamap.list.item.tag"))->each(
|
||||
$selector = $this->getSelector($response, getenv("yamap.list.tag.{$this->getBoardName()}"));
|
||||
$selector->filter(getenv("yamap.list.item.tag.{$this->getBoardName()}"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
|
||||
$date = date("Y") . "-" . $node->filter(getenv("yamap.list.item.date.tag"))->text();
|
||||
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
|
||||
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
||||
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
|
||||
if ($nickname != getenv("yamap.list.item.nickname.except")) {
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->children()->last()->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@ -1,16 +1,22 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler\Mangboard;
|
||||
namespace App\Controllers\Mangboard\Crawler;
|
||||
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use App\Libraries\MyCrawler\MangboardCrawler;
|
||||
use App\Controllers\Mangboard\CrawlerController;
|
||||
use CodeIgniter\HTTP\RequestInterface;
|
||||
use CodeIgniter\HTTP\ResponseInterface;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class YamoonCrawler extends MangboardCrawler
|
||||
class YamoonCrawler extends CrawlerController
|
||||
{
|
||||
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
public function initController(RequestInterface $request, ResponseInterface $response, LoggerInterface $logger)
|
||||
{
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
parent::initController($request, $response, $logger);
|
||||
}
|
||||
final protected function getHost(): string
|
||||
{
|
||||
return getenv("yamoon.host.url");
|
||||
}
|
||||
//작성내용
|
||||
// <div class="panel panel-default">
|
||||
@ -43,43 +49,10 @@ class YamoonCrawler extends MangboardCrawler
|
||||
// </div>
|
||||
// <div id="freesubframe"></div>
|
||||
// </div>
|
||||
protected function detail_process(int $cnt, array $listInfo): array
|
||||
protected function getDetailSelector(array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
|
||||
//작성시간
|
||||
// $selector = $this->getSelector($response, getenv("yamoon.view.date.tag"));
|
||||
// $listInfo['date'] = trim($selector->text());
|
||||
// if ($this->isDebug) {
|
||||
// throw new \Exception(
|
||||
// sprintf(
|
||||
// "\n--------------%s Debug--------------\n%s\n%s\n---------------------------------------\n",
|
||||
// __FUNCTION__,
|
||||
// var_export($listInfo, true),
|
||||
// $selector->html()
|
||||
// )
|
||||
// );
|
||||
// }
|
||||
//작성내용
|
||||
$selector = $this->getSelector($response, getenv("yamoon.view.content.tag"));
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->isDebug) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
var_export($listInfo, true),
|
||||
var_export($media_urls, true)
|
||||
));
|
||||
} else {
|
||||
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
||||
$storages = $this->media_process($media_urls);
|
||||
if (!count($storages)) {
|
||||
throw new \Exception("등록할 자료가 없습니다.");
|
||||
}
|
||||
$this->backend_process($cnt, $listInfo, $storages);
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
return array($this->getSelector($response, getenv("yamoon.view.content.tag")), $listInfo);
|
||||
}
|
||||
//리스트 내용
|
||||
// <td class="listvisited mobile-td subject-view">
|
||||
@ -87,10 +60,17 @@ class YamoonCrawler extends MangboardCrawler
|
||||
// 졸고 있는 여군</a>
|
||||
// <i class="fa fa-commenting-o" aria-hidden="true"></i> <span class="color-red small">6</span>
|
||||
// <span class="visible-xs visible-sm small"><i class="fa fa-user-o" aria-hidden="true"></i> yeeyuu | <i class="fa fa-thumbs-o-up" aria-hidden="true"></i> 6 | <i class="fa fa-eye" aria-hidden="true"></i> 369 | No 89372 | 2024-09-13</span>
|
||||
// </td>
|
||||
public function execute(): void
|
||||
// </td>
|
||||
public function execute(string $board_name, string $user_id = null, ...$params): void
|
||||
{
|
||||
try {
|
||||
//추가옵션
|
||||
$this->isDebug = in_array('debug', $params);
|
||||
$this->isCopy = in_array('copy', $params);
|
||||
$this->setBoardName($board_name);
|
||||
$this->login_process($user_id);
|
||||
//실행
|
||||
$listInfos = [];
|
||||
if ($this->isDebug) {
|
||||
$listInfo = [];
|
||||
$listInfo['title'] = 'test_title';
|
||||
@ -98,12 +78,10 @@ class YamoonCrawler extends MangboardCrawler
|
||||
$listInfo['hit'] = 1;
|
||||
$listInfo['date'] = date("Y-m-d H:i:s");
|
||||
$listInfo['detail_url'] = getenv("yamoon.view.test.url.{$this->getBoardName()}");
|
||||
$this->detail_process(1, $listInfo);
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
|
||||
$listInfos[] = $listInfo;
|
||||
} else {
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->getBoardName()}"));
|
||||
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
|
||||
$this->getSelector($response, getenv("yamoon.list.tag.{$this->getBoardName()}"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
@ -113,12 +91,12 @@ class YamoonCrawler extends MangboardCrawler
|
||||
$listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
$this->list_process(intval(getenv("yamoon.list.max_limit")), $listInfos);
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
$this->list_process(intval(getenv("yamoon.list.max_limit.{$this->getBoardName()}")), $listInfos);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
"\n---%s 오류---\n%s\n-----------------------------------------\n",
|
||||
@ -29,9 +29,8 @@ abstract class CrawlerController extends CommonController
|
||||
{
|
||||
parent::initController($request, $response, $logger);
|
||||
}
|
||||
abstract protected function detail_content_process(int $cnt, array $listInfo): array;
|
||||
abstract protected function detail_download_process(int $cnt, array $listInfo): array;
|
||||
abstract protected function getHost(): string;
|
||||
abstract protected function getDetailSelector(array $listInfo): array;
|
||||
final protected function getBoardName(): string
|
||||
{
|
||||
return $this->_board_name;
|
||||
@ -78,14 +77,14 @@ abstract class CrawlerController extends CommonController
|
||||
}
|
||||
return $this->_board_model;
|
||||
}
|
||||
public function getUserModel(): UserModel
|
||||
final protected function getUserModel(): UserModel
|
||||
{
|
||||
if ($this->_user_model === null) {
|
||||
return $this->_user_model = new UserModel();
|
||||
}
|
||||
return $this->_user_model;
|
||||
}
|
||||
protected function login_process(string $user_id = null): void
|
||||
final protected function login_process(string $user_id = null): void
|
||||
{
|
||||
$user_id = $user_id ?? getenv("mangboard.login.default.id");
|
||||
$password = getenv("mangboard.login.default.password");
|
||||
@ -135,9 +134,8 @@ abstract class CrawlerController extends CommonController
|
||||
{
|
||||
switch ($media_tag) {
|
||||
case 'video':
|
||||
try {
|
||||
$url = $node->attr($attr); //<video src="test.mp4"></video> 또는 <video data-src="test.mp4"></video>
|
||||
} catch (\Exception) {
|
||||
$url = $node->attr($attr); //<video src="test.mp4"></video> 또는 <video data-src="test.mp4"></video>
|
||||
if ($url === null) {
|
||||
$url = $node->children()->attr("src"); //<video><source src="test.mp4"></source</video>
|
||||
}
|
||||
break;
|
||||
@ -148,7 +146,7 @@ abstract class CrawlerController extends CommonController
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
protected function getUrlsByMediaType(Crawler $selector, string $media_tag, string $attr, array $urls = []): array
|
||||
private function getUrlsByMediaType(Crawler $selector, string $media_tag, string $attr, array $urls = []): array
|
||||
{
|
||||
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_tag} 작업시작--------");
|
||||
$urls[$media_tag] = [];
|
||||
@ -182,7 +180,7 @@ abstract class CrawlerController extends CommonController
|
||||
$storage->setOriginSequence($file_sequence);
|
||||
return $storage->save();
|
||||
}
|
||||
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
||||
//ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
||||
private function media_download(string $media_tag, string $url): array
|
||||
{
|
||||
$file_names = explode('/', $url);
|
||||
@ -199,7 +197,7 @@ abstract class CrawlerController extends CommonController
|
||||
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
||||
return array($file_name, $content);
|
||||
}
|
||||
final protected function media_process(array $media_urls): array
|
||||
private function media_process(array $media_urls): array
|
||||
{
|
||||
$file_sequence = 1;
|
||||
$storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
||||
@ -226,7 +224,7 @@ abstract class CrawlerController extends CommonController
|
||||
}
|
||||
return $storages;
|
||||
}
|
||||
protected function backend_process(int $cnt, array $listInfo, array $storages)
|
||||
private function backend_process(int $cnt, array $listInfo, array $storages)
|
||||
{
|
||||
//File DB 및 Board DB 등록작업등
|
||||
$board_entity = $this->getBoardModel()->createByCrawler(
|
||||
@ -252,6 +250,47 @@ abstract class CrawlerController extends CommonController
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
}
|
||||
private function detail_content_process(int $cnt, array $listInfo): array
|
||||
{
|
||||
list($selector, $listInfo) = $this->getDetailSelector($listInfo);
|
||||
$formDatas = [];
|
||||
$formDatas['image_path'] = "";
|
||||
$formDatas['content'] = $selector->html();
|
||||
//File DB 및 Board DB 등록작업등
|
||||
$this->getBoardModel()->createByCrawler(
|
||||
$this->getBoardsEntity(),
|
||||
$this->getUserEntity(),
|
||||
$cnt,
|
||||
$listInfo,
|
||||
[],
|
||||
$formDatas
|
||||
);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
}
|
||||
private function detail_download_process(int $cnt, array $listInfo): array
|
||||
{
|
||||
list($selector, $listInfo) = $this->getDetailSelector($listInfo);
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->isDebug) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
var_export($listInfo, true),
|
||||
var_export($media_urls, true)
|
||||
));
|
||||
} else {
|
||||
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
||||
$storages = $this->media_process($media_urls);
|
||||
if (!count($storages)) {
|
||||
throw new \Exception("등록할 자료가 없습니다.");
|
||||
}
|
||||
$this->backend_process($cnt, $listInfo, $storages);
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
}
|
||||
protected function list_process(int $max_limit, array $listInfos): void
|
||||
{
|
||||
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
|
||||
|
||||
@ -1,9 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler\Mangboard;
|
||||
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use App\Libraries\MyCrawler\MangboardCrawler;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class InvenCrawler extends MangboardCrawler {}
|
||||
@ -1,115 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler\Mangboard;
|
||||
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use App\Libraries\MyCrawler\MangboardCrawler;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class YamapCrawler extends MangboardCrawler
|
||||
{
|
||||
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
}
|
||||
protected function detail_process(int $cnt, array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$selector = $this->getSelector($response, getenv("yamap.view.content.tag"));
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->isDebug) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
var_export($listInfo, true),
|
||||
var_export($media_urls, true)
|
||||
));
|
||||
} else {
|
||||
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
||||
$storages = $this->media_process($media_urls);
|
||||
if (!count($storages)) {
|
||||
throw new \Exception("등록할 자료가 없습니다.");
|
||||
}
|
||||
$this->backend_process($cnt, $listInfo, $storages);
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
return $listInfo;
|
||||
}
|
||||
//리스트내용
|
||||
// <div class="panel panel-default">
|
||||
// <div class="text-center panel-heading-local-title text-bold">요즘 패션</div>
|
||||
// <div style="margin:5px 10px;">
|
||||
// <span class="pull-left dropdown">
|
||||
// 괴강고귀
|
||||
// </span>
|
||||
// <span class="pull-right">
|
||||
// | 추천 (14) | 조회 (432)
|
||||
// </span>
|
||||
// <div class="clearfix"></div>
|
||||
// <hr class="hr-xs-xs">
|
||||
// <span>
|
||||
// <a href="javascript:void(0);" id="incfont"><i class="fa fa-plus fa-fw" aria-hidden="true"></i></a><a href="javascript:void(0);" id="decfont"><i class="fa fa-minus fa-fw margin-left-5" aria-hidden="true"></i></a>
|
||||
// </span>
|
||||
// <span class="pull-right">2024-09-14 01:53:45
|
||||
// </span>
|
||||
// <div class="clearfix"></div>
|
||||
// <hr class="margin-top-5 margin-bottom-20">
|
||||
// <div class="fr-view margin-bottom-30" id="read-content" style="word-break:break-all;">
|
||||
// <p><img title="" class="cloudzoom" data-cloudzoom="zoomImage:'/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg'" class="fr-fic fr-dii" src="/newboard/yamoonfreeboard/uploads/humor/mceu_86177012011726246415487.jpg" alt=""></p>
|
||||
// <p> </p>
|
||||
// </div>
|
||||
// </div>
|
||||
// <div class="margin-10">
|
||||
// <a href="javascript:void(0)" onclick="javascript:window.open('https://twitter.com/intent/tweet?text='+encodeURIComponent(document.title)+'%20-%20'+encodeURIComponent(document.URL), 'twittersharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-twitter-square fa-lg ya-tooltip" title="트위터 공유하기"></i></a>
|
||||
// <a href="javascript:void(0)" onclick="javascript:window.open('https://www.facebook.com/sharer/sharer.php?u='+encodeURIComponent(document.URL)+'&t='+encodeURIComponent(document.title), 'facebooksharedialog', 'menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600');return false;" target="_blank"> <i class="fa fa-facebook-square fa-lg ya-tooltip" title="페이스북 공유하기"></i></a>
|
||||
// </div>
|
||||
// <div id="freesubframe"></div>
|
||||
// </div>
|
||||
public function execute(): void
|
||||
{
|
||||
try {
|
||||
if ($this->isDebug) {
|
||||
$listInfo = [];
|
||||
$listInfo['title'] = 'test_title';
|
||||
$listInfo['nickname'] = 'test_name';
|
||||
$listInfo['hit'] = 1;
|
||||
$listInfo['date'] = date("Y-m-d H:i:s");
|
||||
$listInfo['detail_url'] = getenv("yamap.view.test.url.{$this->getBoardName()}");
|
||||
$this->detail_process(1, $listInfo);
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
|
||||
} else {
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->getBoardName()}"));
|
||||
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
|
||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||
$selector->filter(getenv("yamap.list.item.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
|
||||
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
|
||||
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
|
||||
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
|
||||
if ($nickname != getenv("yamap.list.item.nickname.except")) {
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->children()->last()->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
||||
}
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
$this->list_process(intval(getenv("yamap.list.max_limit")), $listInfos);
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
"\n---%s 오류---\n%s\n-----------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
$e->getMessage()
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,135 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler;
|
||||
|
||||
use App\Libraries\CommonLibrary;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use App\Traits\FileTrait;
|
||||
|
||||
abstract class MyCrawler extends CommonLibrary
|
||||
{
|
||||
use FileTrait;
|
||||
protected function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
}
|
||||
abstract protected function getMySocket();
|
||||
abstract protected function createMyStorage();
|
||||
final protected function getSelector(string $content, string $tag): Crawler
|
||||
{
|
||||
$crawler = new Crawler($content);
|
||||
if ($this->isDebug) {
|
||||
log_message("debug", __FUNCTION__ . "=> " . $tag);
|
||||
}
|
||||
$crawler->filter($tag);
|
||||
if ($this->isDebug) {
|
||||
log_message("debug", sprintf(
|
||||
"\n------------%s HTML-------------\n%s\n-----------------------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
$crawler->filter($tag)->html()
|
||||
));
|
||||
}
|
||||
return $crawler->filter($tag);
|
||||
}
|
||||
|
||||
protected function changeURLByCrawler(string $url): string
|
||||
{
|
||||
return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
|
||||
}
|
||||
protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
|
||||
{
|
||||
switch ($media_type) {
|
||||
case 'video':
|
||||
try {
|
||||
$url = $node->attr($attr); //<video src="test.mp4"></video> 또는 <video data-src="test.mp4"></video>
|
||||
} catch (\Exception) {
|
||||
$url = $node->children()->attr("src"); //<video><source src="test.mp4"></source</video>
|
||||
}
|
||||
break;
|
||||
case 'img':
|
||||
default:
|
||||
$url = $node->attr($attr);
|
||||
break;
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
protected function getUrlsByMediaType(Crawler $selector, string $media_type, string $attr, array $urls = []): array
|
||||
{
|
||||
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_type} 작업시작--------");
|
||||
$urls[$media_type] = [];
|
||||
$selector->filter($media_type)->each(
|
||||
function (Crawler $node) use (&$media_type, &$attr, &$urls): void {
|
||||
$url = $this->getUrlByMediaType($node, $media_type, $attr);
|
||||
if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) {
|
||||
$urls[$media_type][] = $this->changeURLByCrawler($matches[0]);
|
||||
} else {
|
||||
log_message("debug", __FUNCTION__ . "-> {$media_type}:{$attr}\n");
|
||||
//Node 모든 속성은 DOMElement 변환 후 반환가능
|
||||
$domNode = $node->getNode(0);
|
||||
if ($domNode->hasAttributes()) {
|
||||
foreach ($domNode->attributes as $attr) {
|
||||
log_message("debug", "{$attr->nodeName} = {$attr->nodeValue}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_type} 작업완료--------");
|
||||
return $urls;
|
||||
}
|
||||
private function media_save(int $file_sequence, string $media_type, string $file_name, string $content): mixed
|
||||
{
|
||||
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
|
||||
$storage = $this->createMyStorage();
|
||||
$storage->setOriginName($file_name);
|
||||
$storage->setOriginContent($content);
|
||||
$storage->setOriginType($media_type);
|
||||
$storage->setOriginSequence($file_sequence);
|
||||
return $storage->save();
|
||||
}
|
||||
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
||||
private function media_download(string $media_type, string $url): array
|
||||
{
|
||||
$file_names = explode('/', $url);
|
||||
if (!is_array($file_names) || !count($file_names)) {
|
||||
throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url);
|
||||
}
|
||||
$file_name = array_pop($file_names);
|
||||
$temps = explode(".", $file_name);
|
||||
$file_ext = array_pop($temps);
|
||||
if (!$this->isFileType_FileTrait($file_ext, $media_type)) {
|
||||
throw new \Exception("파일명 형식이 {$media_type}가 아닙니다");
|
||||
}
|
||||
$content = $this->getMySocket()->getContent($url);
|
||||
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
||||
return array($file_name, $content);
|
||||
}
|
||||
final protected function media_process(array $media_urls): array
|
||||
{
|
||||
$file_sequence = 1;
|
||||
$storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
||||
foreach ($media_urls as $media_type => $urls) {
|
||||
$total = count($urls);
|
||||
foreach ($urls as $url) {
|
||||
log_message("notice", __FUNCTION__ . " {$file_sequence}번째/총:{$total} MediaType->{$media_type} 작업 시작");
|
||||
try {
|
||||
list($file_name, $content) = $this->media_download($media_type, $url);
|
||||
$storage = $this->media_save($file_sequence, $media_type, $file_name, $content);
|
||||
log_message("debug", __FUNCTION__ . " {$file_sequence}번째/총:{$total} 결과=>" . $storage->getOriginName());
|
||||
$storages[] = $storage;
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
"\n---%s MediaType->%s {$file_sequence}번째/총:{$total} 오류---\n%s\n-----------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
$media_type,
|
||||
$e->getMessage()
|
||||
));
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . " {$file_sequence}번째/총:{$total} MediaType->{$media_type} 작업 완료");
|
||||
$file_sequence++;
|
||||
}
|
||||
}
|
||||
log_message("notice", __FUNCTION__ . "=> 게시물 {$url} 작업종료");
|
||||
return $storages;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user