Automation init...3
This commit is contained in:
parent
2734cfa694
commit
6a4428755e
@ -31,17 +31,13 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
|
||||
$routes->cli('check_level/(:alpha)', 'UserController::check_level/$1');
|
||||
});
|
||||
$routes->group('crawler', function ($routes) {
|
||||
$routes->cli('yamap', 'CrawlerController::yamap');
|
||||
$routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1');
|
||||
$routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2');
|
||||
$routes->cli('yamoon', 'CrawlerController::yamoon');
|
||||
$routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1');
|
||||
$routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2');
|
||||
$routes->cli('sir', 'CrawlerController::sir');
|
||||
$routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1');
|
||||
$routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2');
|
||||
$routes->cli('inven', 'CrawlerController::inven');
|
||||
$routes->cli('inven/(:alpha)', 'CrawlerController::inven/$1');
|
||||
$routes->cli('inven/(:alpha)/(:any)', 'CrawlerController::inven/$1/$2');
|
||||
$routes->cli('yamap/(:any)', 'CrawlerController::yamap/$1');
|
||||
$routes->cli('yamap/(:any)/(:any)', 'CrawlerController::yamap/$1/$2');
|
||||
$routes->cli('yamoon/(:any)', 'CrawlerController::yamoon/$1');
|
||||
$routes->cli('yamoon/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2');
|
||||
$routes->cli('sir/(:any)', 'CrawlerController::sir/$1');
|
||||
$routes->cli('sir/(:any)/(:any)', 'CrawlerController::sir/$1/$2');
|
||||
$routes->cli('inven/(:any)', 'CrawlerController::inven/$1');
|
||||
$routes->cli('inven/(:any)/(:any)', 'CrawlerController::inven/$1/$2');
|
||||
});
|
||||
});
|
||||
|
||||
@ -47,16 +47,14 @@ class CrawlerController extends CommonController
|
||||
log_message("notice", "{$id}로 로그인 성공");
|
||||
return $user_entity;
|
||||
}
|
||||
public function yamap(string $id = "", string $option = ""): string
|
||||
public function yamap(string $board_name, ...$params): string
|
||||
{
|
||||
try {
|
||||
//1. 사이트 로그인 처리
|
||||
$user_entity = $this->login($id);
|
||||
$user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new YamapCrawler(getenv('yamap.host.url'), getenv("yamap.host.board_name"), $user_entity);
|
||||
if ($option) {
|
||||
$crawler->setDebug($option === "debug" ? true : false);
|
||||
}
|
||||
$crawler = new YamapCrawler(getenv('yamap.host.url'), $board_name, $user_entity);
|
||||
$crawler->setDebug(in_array('debug', $params));
|
||||
$crawler->execute(intval(getenv("yamap.list.max_limit")));
|
||||
return "완료되었습니다.";
|
||||
} catch (\Exception $e) {
|
||||
@ -64,33 +62,29 @@ class CrawlerController extends CommonController
|
||||
return $e->getMessage();
|
||||
}
|
||||
}
|
||||
public function yamoon(string $id = "", string $option = ""): string
|
||||
public function yamoon(string $board_name, ...$params): string
|
||||
{
|
||||
try {
|
||||
//1. 사이트 로그인 처리
|
||||
$user_entity = $this->login($id);
|
||||
$user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), getenv("yamoon.host.board_name"), $user_entity);
|
||||
if ($option) {
|
||||
$crawler->setDebug($option === "debug" ? true : false);
|
||||
}
|
||||
$crawler->execute(intval(getenv("yamap.list.max_limit")));
|
||||
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $board_name, $user_entity);
|
||||
$crawler->setDebug(in_array('debug', $params));
|
||||
$crawler->execute(intval(getenv("yamoon.list.max_limit")));
|
||||
return "완료되었습니다.";
|
||||
} catch (\Exception $e) {
|
||||
log_message("error", $e->getMessage());
|
||||
return $e->getMessage();
|
||||
}
|
||||
}
|
||||
public function sir(string $id = "", string $option = ""): string
|
||||
public function sir(string $board_name, ...$params): string
|
||||
{
|
||||
try {
|
||||
//1. 사이트 로그인 처리
|
||||
$user_entity = $this->login($id);
|
||||
$user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new SirCrawler(getenv("sir.host.url"), getenv("sir.host.board_name"), $user_entity);
|
||||
if ($option) {
|
||||
$crawler->setDebug($option === "debug" ? true : false);
|
||||
}
|
||||
$crawler = new SirCrawler(getenv("sir.host.url"), $board_name, $user_entity);
|
||||
$crawler->setDebug(in_array('debug', $params));
|
||||
$crawler->execute(intval(getenv("sir.list.max_limit")));
|
||||
return "완료되었습니다.";
|
||||
} catch (\Exception $e) {
|
||||
@ -98,16 +92,14 @@ class CrawlerController extends CommonController
|
||||
return $e->getMessage();
|
||||
}
|
||||
}
|
||||
public function inven(string $id = "", string $option = ""): string
|
||||
public function inven(string $board_name, ...$params): string
|
||||
{
|
||||
try {
|
||||
//1. 사이트 로그인 처리
|
||||
$user_entity = $this->login($id);
|
||||
$user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new InvenCrawler(getenv("inven.host.url"), getenv("inven.host.board_name"), $user_entity);
|
||||
if ($option) {
|
||||
$crawler->setDebug($option === "debug" ? true : false);
|
||||
}
|
||||
$crawler = new InvenCrawler(getenv("inven.host.url"), $board_name, $user_entity);
|
||||
$crawler->setDebug(in_array('debug', $params));
|
||||
$crawler->execute(intval(getenv("inven.list.max_limit")));
|
||||
return "완료되었습니다.";
|
||||
} catch (\Exception $e) {
|
||||
|
||||
@ -11,6 +11,24 @@ class InvenCrawler extends MangboardCrawler
|
||||
{
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
}
|
||||
protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
|
||||
{
|
||||
switch ($media_type) {
|
||||
case 'video':
|
||||
$url = parent::getUrlByMediaType($node, $media_type, $attr);
|
||||
//그래도 null이면 data-src로 추출해본다.
|
||||
$attributes = $node->extract(['data-src']);
|
||||
if (count($attributes)) {
|
||||
$url = $attributes[0];
|
||||
}
|
||||
break;
|
||||
case 'img':
|
||||
default:
|
||||
$url = parent::getUrlByMediaType($node, $media_type, $attr);
|
||||
break;
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
//작성내용
|
||||
// <div class="articleContent">
|
||||
// <div id="imageCollectDiv" class="contentBody">
|
||||
@ -33,8 +51,8 @@ class InvenCrawler extends MangboardCrawler
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$tag = getenv("inven.view.content.tag");
|
||||
$selector = $this->getSelector($response, $tag);
|
||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->getDebug()) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
@ -84,28 +102,31 @@ class InvenCrawler extends MangboardCrawler
|
||||
public function execute(int $max_limit): void
|
||||
{
|
||||
try {
|
||||
$listInfos = [];
|
||||
if ($this->getDebug()) {
|
||||
$this->detail_page(1, ['detail_url' => getenv("inven.view.test.url")]);
|
||||
}
|
||||
$response = $this->getMySocket()->getContent(getenv("inven.list.url"));
|
||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||
$this->getSelector($response, getenv("inven.list.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
|
||||
$date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
|
||||
$nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("inven.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
||||
$url = getenv("inven.view.test.url.{$this->_board_name}");
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
|
||||
$this->detail_page(1, ['detail_url' => $url]);
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
|
||||
} else {
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}"));
|
||||
$this->getSelector($response, getenv("inven.list.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
|
||||
$date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
|
||||
$nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("inven.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
}
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
|
||||
@ -11,9 +11,9 @@ use App\Models\Mangboard\BoardsModel;
|
||||
|
||||
abstract class MangboardCrawler extends MyCrawler
|
||||
{
|
||||
protected $_mySocket = null;
|
||||
protected $_host = "";
|
||||
private $_board_name = "";
|
||||
private $_mySocket = null;
|
||||
private $_host = "";
|
||||
protected $_board_name = "";
|
||||
private $_user_entity = null;
|
||||
protected function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
{
|
||||
@ -22,7 +22,8 @@ abstract class MangboardCrawler extends MyCrawler
|
||||
$this->_board_name = $board_name;
|
||||
$this->_user_entity = $user_entity;
|
||||
}
|
||||
protected function getMySocket()
|
||||
abstract public function execute(int $max_limit): void;
|
||||
final protected function getMySocket()
|
||||
{
|
||||
if ($this->_mySocket === null) {
|
||||
$this->_mySocket = new WebSocket($this->_host);
|
||||
|
||||
@ -13,9 +13,9 @@ class SirCrawler extends MangboardCrawler
|
||||
{
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
}
|
||||
protected function changeURLByMediaType(string $url): string
|
||||
protected function changeURLByCrawler(string $url): string
|
||||
{
|
||||
return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url));
|
||||
return str_replace("/sir.kr/", "", parent::changeURLByCrawler($url));
|
||||
}
|
||||
//작성내용
|
||||
// <article class="sir_vbo ">
|
||||
@ -91,8 +91,8 @@ class SirCrawler extends MangboardCrawler
|
||||
//작성내용
|
||||
$tag = getenv("sir.view.content.tag");
|
||||
$selector = $this->getSelector($response, $tag);
|
||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->getDebug()) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
@ -142,29 +142,32 @@ class SirCrawler extends MangboardCrawler
|
||||
public function execute(int $max_limit): void
|
||||
{
|
||||
try {
|
||||
$listInfos = [];
|
||||
if ($this->getDebug()) {
|
||||
$this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]);
|
||||
}
|
||||
$response = $this->getMySocket()->getContent(getenv("sir.list.url"));
|
||||
$this->getSelector($response, getenv("sir.list.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
|
||||
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
|
||||
$detail_url = str_replace("/sir.kr/", "", $link_node->attr("href"));
|
||||
// $detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
|
||||
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
|
||||
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
|
||||
$url = getenv("sir.view.test.url.{$this->_board_name}");
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
|
||||
$this->detail_page(1, ['detail_url' => $url]);
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
|
||||
} else {
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->_board_name}"));
|
||||
$this->getSelector($response, getenv("sir.list.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
|
||||
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
|
||||
$detail_url = $this->changeURLByCrawler($link_node->attr("href"));
|
||||
// $detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
|
||||
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
|
||||
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
);
|
||||
// throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true));
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
}
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
|
||||
@ -16,8 +16,8 @@ class YamapCrawler extends MangboardCrawler
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$tag = getenv("yamap.view.content.tag");
|
||||
$selector = $this->getSelector($response, $tag);
|
||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->getDebug()) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
@ -68,32 +68,36 @@ class YamapCrawler extends MangboardCrawler
|
||||
public function execute(int $max_limit): void
|
||||
{
|
||||
try {
|
||||
$listInfos = [];
|
||||
if ($this->getDebug()) {
|
||||
$this->detail_page(1, ['detail_url' => getenv("yamap.view.test.url")]);
|
||||
}
|
||||
$response = $this->getMySocket()->getContent(getenv("yamap.list.url"));
|
||||
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
|
||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||
$selector->filter(getenv("yamap.list.item.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
|
||||
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
|
||||
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
|
||||
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
|
||||
if ($nickname != getenv("yamap.list.item.nickname.except")) {
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->children()->last()->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
||||
$url = getenv("yamap.view.test.url.{$this->_board_name}");
|
||||
log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업시작");
|
||||
$this->detail_page(1, ['detail_url' => $url]);
|
||||
log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료");
|
||||
} else {
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_board_name}"));
|
||||
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
|
||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||
$selector->filter(getenv("yamap.list.item.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
|
||||
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
|
||||
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
|
||||
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
|
||||
if ($nickname != getenv("yamap.list.item.nickname.except")) {
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->children()->last()->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
||||
}
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
}
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
|
||||
@ -61,8 +61,8 @@ class YamoonCrawler extends MangboardCrawler
|
||||
//작성내용
|
||||
$tag = getenv("yamoon.view.content.tag");
|
||||
$selector = $this->getSelector($response, $tag);
|
||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||
if ($this->getDebug()) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
@ -90,25 +90,29 @@ class YamoonCrawler extends MangboardCrawler
|
||||
public function execute(int $max_limit): void
|
||||
{
|
||||
try {
|
||||
$listInfos = [];
|
||||
if ($this->getDebug()) {
|
||||
$this->detail_page(1, ['detail_url' => getenv("yamoon.view.test.url")]);
|
||||
}
|
||||
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url"));
|
||||
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
$info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
|
||||
$infos = explode("|", $info_node->text());
|
||||
$listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
|
||||
$url = getenv("yamoon.view.test.url.{$this->_board_name}");
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
|
||||
$this->detail_page(1, ['detail_url' => $url]);
|
||||
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
|
||||
} else {
|
||||
$listInfos = [];
|
||||
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_board_name}"));
|
||||
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
$info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
|
||||
$infos = explode("|", $info_node->text());
|
||||
$listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
}
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
|
||||
@ -33,28 +33,45 @@ abstract class MyCrawler extends CommonLibrary
|
||||
return $crawler->filter($tag);
|
||||
}
|
||||
|
||||
protected function changeURLByMediaType(string $url): string
|
||||
protected function changeURLByCrawler(string $url): string
|
||||
{
|
||||
return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
|
||||
}
|
||||
protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
|
||||
protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
|
||||
{
|
||||
$urls[$media_type] = [];
|
||||
$selector->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$media_type, &$options, &$urls): void {
|
||||
$url = $node->attr($options["attr"]);
|
||||
switch ($media_type) {
|
||||
case 'video':
|
||||
if ($url === null) {
|
||||
$url = $node->children()->attr("src");
|
||||
}
|
||||
break;
|
||||
switch ($media_type) {
|
||||
case 'video':
|
||||
try {
|
||||
$url = $node->attr($attr); //<video src="test.mp4"></video> 또는 <video data-src="test.mp4"></video>
|
||||
} catch (\Exception) {
|
||||
$url = $node->children()->attr("src"); //<video><source src="test.mp4"></source</video>
|
||||
}
|
||||
break;
|
||||
case 'img':
|
||||
default:
|
||||
$url = $node->attr($attr);
|
||||
break;
|
||||
}
|
||||
return $url;
|
||||
}
|
||||
protected function getUrlsByMediaType(Crawler $selector, string $media_type, string $attr, array $urls = []): array
|
||||
{
|
||||
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_type} 작업시작--------");
|
||||
$urls[$media_type] = [];
|
||||
$selector->filter($media_type)->each(
|
||||
function (Crawler $node) use (&$media_type, &$attr, &$urls): void {
|
||||
$url = $this->getUrlByMediaType($node, $media_type, $attr);
|
||||
if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) {
|
||||
$urls[$media_type][] = $this->changeURLByMediaType($matches[0]);
|
||||
$urls[$media_type][] = $this->changeURLByCrawler($matches[0]);
|
||||
} else {
|
||||
log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]\n");
|
||||
log_message("debug", $node->html());
|
||||
log_message("debug", __FUNCTION__ . "-> {$media_type}:{$attr}\n");
|
||||
//Node 모든 속성은 DOMElement 변환 후 반환가능
|
||||
$domNode = $node->getNode(0);
|
||||
if ($domNode->hasAttributes()) {
|
||||
foreach ($domNode->attributes as $attr) {
|
||||
log_message("debug", "{$attr->nodeName} = {$attr->nodeValue}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
@ -130,7 +147,7 @@ abstract class MyCrawler extends CommonLibrary
|
||||
$max_limit = count($listInfos);
|
||||
}
|
||||
$total = count($listInfos);
|
||||
$i = $this->getDebug() ? $max_limit : 1;
|
||||
$i = 1;
|
||||
foreach ($listInfos as $listInfo) {
|
||||
if ($i <= $max_limit) {
|
||||
log_message("notice", __FUNCTION__ . " 게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작");
|
||||
|
||||
@ -66,17 +66,18 @@ class WebSocket extends MySocket
|
||||
//기본 Option
|
||||
$options['cookies'] = $this->getCookieJar(); //쿠키값
|
||||
$options['timeout'] = getenv("socket.web.timeout"); // 5초 안에 응답이 없으면 타임아웃
|
||||
log_message("debug", "Socket URL-> " . $url);
|
||||
log_message("debug", __FUNCTION__ . "=> 호출 Socket URL-> " . $url);
|
||||
return $this->getClient()->$method($url, $options);
|
||||
}
|
||||
|
||||
public function getContent(string $url, $method = "get", array $options = []): string
|
||||
{
|
||||
log_message("debug", __FUNCTION__ . "=> 호출 URL:" . $url);
|
||||
$response = $this->getResponse($url, $method, $options);
|
||||
if ($response->getStatusCode() == 200) {
|
||||
// return $response->getBody()->getContents();
|
||||
return $response->getBody();
|
||||
}
|
||||
throw new \Exception("error", "{$url} 접속실패: " . $response->getStatusCode());
|
||||
throw new \Exception("error", __FUNCTION__ . "=> {$url} 접속실패: " . $response->getStatusCode());
|
||||
}
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@ trait FileTrait
|
||||
}
|
||||
}
|
||||
|
||||
final protected function isFileType_FileTrait(string $file_ext, $type = "image"): bool
|
||||
final protected function isFileType_FileTrait(string $file_ext, $type = "img"): bool
|
||||
{
|
||||
switch ($type) {
|
||||
case "audio":
|
||||
@ -22,7 +22,7 @@ trait FileTrait
|
||||
case "video":
|
||||
$exts = ['mov', 'avi', 'mp4'];
|
||||
break;
|
||||
case "image":
|
||||
case "img":
|
||||
default:
|
||||
$exts = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
|
||||
break;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user