Automation init...3

This commit is contained in:
최준흠 2024-09-18 13:51:42 +09:00
parent 2734cfa694
commit 6a4428755e
10 changed files with 187 additions and 148 deletions

View File

@ -31,17 +31,13 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
$routes->cli('check_level/(:alpha)', 'UserController::check_level/$1'); $routes->cli('check_level/(:alpha)', 'UserController::check_level/$1');
}); });
$routes->group('crawler', function ($routes) { $routes->group('crawler', function ($routes) {
$routes->cli('yamap', 'CrawlerController::yamap'); $routes->cli('yamap/(:any)', 'CrawlerController::yamap/$1');
$routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1'); $routes->cli('yamap/(:any)/(:any)', 'CrawlerController::yamap/$1/$2');
$routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2'); $routes->cli('yamoon/(:any)', 'CrawlerController::yamoon/$1');
$routes->cli('yamoon', 'CrawlerController::yamoon'); $routes->cli('yamoon/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2');
$routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1'); $routes->cli('sir/(:any)', 'CrawlerController::sir/$1');
$routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2'); $routes->cli('sir/(:any)/(:any)', 'CrawlerController::sir/$1/$2');
$routes->cli('sir', 'CrawlerController::sir'); $routes->cli('inven/(:any)', 'CrawlerController::inven/$1');
$routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1'); $routes->cli('inven/(:any)/(:any)', 'CrawlerController::inven/$1/$2');
$routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2');
$routes->cli('inven', 'CrawlerController::inven');
$routes->cli('inven/(:alpha)', 'CrawlerController::inven/$1');
$routes->cli('inven/(:alpha)/(:any)', 'CrawlerController::inven/$1/$2');
}); });
}); });

View File

@ -47,16 +47,14 @@ class CrawlerController extends CommonController
log_message("notice", "{$id}로 로그인 성공"); log_message("notice", "{$id}로 로그인 성공");
return $user_entity; return $user_entity;
} }
public function yamap(string $id = "", string $option = ""): string public function yamap(string $board_name, ...$params): string
{ {
try { try {
//1. 사이트 로그인 처리 //1. 사이트 로그인 처리
$user_entity = $this->login($id); $user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamapCrawler(getenv('yamap.host.url'), getenv("yamap.host.board_name"), $user_entity); $crawler = new YamapCrawler(getenv('yamap.host.url'), $board_name, $user_entity);
if ($option) { $crawler->setDebug(in_array('debug', $params));
$crawler->setDebug($option === "debug" ? true : false);
}
$crawler->execute(intval(getenv("yamap.list.max_limit"))); $crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {
@ -64,33 +62,29 @@ class CrawlerController extends CommonController
return $e->getMessage(); return $e->getMessage();
} }
} }
public function yamoon(string $id = "", string $option = ""): string public function yamoon(string $board_name, ...$params): string
{ {
try { try {
//1. 사이트 로그인 처리 //1. 사이트 로그인 처리
$user_entity = $this->login($id); $user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), getenv("yamoon.host.board_name"), $user_entity); $crawler = new YamoonCrawler(getenv("yamoon.host.url"), $board_name, $user_entity);
if ($option) { $crawler->setDebug(in_array('debug', $params));
$crawler->setDebug($option === "debug" ? true : false); $crawler->execute(intval(getenv("yamoon.list.max_limit")));
}
$crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {
log_message("error", $e->getMessage()); log_message("error", $e->getMessage());
return $e->getMessage(); return $e->getMessage();
} }
} }
public function sir(string $id = "", string $option = ""): string public function sir(string $board_name, ...$params): string
{ {
try { try {
//1. 사이트 로그인 처리 //1. 사이트 로그인 처리
$user_entity = $this->login($id); $user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new SirCrawler(getenv("sir.host.url"), getenv("sir.host.board_name"), $user_entity); $crawler = new SirCrawler(getenv("sir.host.url"), $board_name, $user_entity);
if ($option) { $crawler->setDebug(in_array('debug', $params));
$crawler->setDebug($option === "debug" ? true : false);
}
$crawler->execute(intval(getenv("sir.list.max_limit"))); $crawler->execute(intval(getenv("sir.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {
@ -98,16 +92,14 @@ class CrawlerController extends CommonController
return $e->getMessage(); return $e->getMessage();
} }
} }
public function inven(string $id = "", string $option = ""): string public function inven(string $board_name, ...$params): string
{ {
try { try {
//1. 사이트 로그인 처리 //1. 사이트 로그인 처리
$user_entity = $this->login($id); $user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new InvenCrawler(getenv("inven.host.url"), getenv("inven.host.board_name"), $user_entity); $crawler = new InvenCrawler(getenv("inven.host.url"), $board_name, $user_entity);
if ($option) { $crawler->setDebug(in_array('debug', $params));
$crawler->setDebug($option === "debug" ? true : false);
}
$crawler->execute(intval(getenv("inven.list.max_limit"))); $crawler->execute(intval(getenv("inven.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {

View File

@ -11,6 +11,24 @@ class InvenCrawler extends MangboardCrawler
{ {
parent::__construct($host, $board_name, $user_entity); parent::__construct($host, $board_name, $user_entity);
} }
protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
{
switch ($media_type) {
case 'video':
$url = parent::getUrlByMediaType($node, $media_type, $attr);
//그래도 null이면 data-src로 추출해본다.
$attributes = $node->extract(['data-src']);
if (count($attributes)) {
$url = $attributes[0];
}
break;
case 'img':
default:
$url = parent::getUrlByMediaType($node, $media_type, $attr);
break;
}
return $url;
}
//작성내용 //작성내용
// <div class="articleContent"> // <div class="articleContent">
// <div id="imageCollectDiv" class="contentBody"> // <div id="imageCollectDiv" class="contentBody">
@ -33,8 +51,8 @@ class InvenCrawler extends MangboardCrawler
$response = $this->getMySocket()->getContent($listInfo['detail_url']); $response = $this->getMySocket()->getContent($listInfo['detail_url']);
$tag = getenv("inven.view.content.tag"); $tag = getenv("inven.view.content.tag");
$selector = $this->getSelector($response, $tag); $selector = $this->getSelector($response, $tag);
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->getDebug()) { if ($this->getDebug()) {
throw new \Exception(sprintf( throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
@ -84,28 +102,31 @@ class InvenCrawler extends MangboardCrawler
public function execute(int $max_limit): void public function execute(int $max_limit): void
{ {
try { try {
$listInfos = [];
if ($this->getDebug()) { if ($this->getDebug()) {
$this->detail_page(1, ['detail_url' => getenv("inven.view.test.url")]); $url = getenv("inven.view.test.url.{$this->_board_name}");
} log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
$response = $this->getMySocket()->getContent(getenv("inven.list.url")); $this->detail_page(1, ['detail_url' => $url]);
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
$this->getSelector($response, getenv("inven.list.tag"))->each( } else {
function (Crawler $node) use (&$listInfos): void { $listInfos = [];
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text(); $response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}"));
$date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text(); $this->getSelector($response, getenv("inven.list.tag"))->each(
$nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text(); function (Crawler $node) use (&$listInfos): void {
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
$link_node = $node->filter(getenv("inven.list.item.link.tag")); $date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
$detail_url = $link_node->attr("href"); $nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
$title = $link_node->text(); //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; $link_node = $node->filter(getenv("inven.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
}
);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
} }
); $this->main_process($max_limit, $listInfos);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
} }
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) { } catch (\Exception $e) {
log_message("warning", sprintf( log_message("warning", sprintf(

View File

@ -11,9 +11,9 @@ use App\Models\Mangboard\BoardsModel;
abstract class MangboardCrawler extends MyCrawler abstract class MangboardCrawler extends MyCrawler
{ {
protected $_mySocket = null; private $_mySocket = null;
protected $_host = ""; private $_host = "";
private $_board_name = ""; protected $_board_name = "";
private $_user_entity = null; private $_user_entity = null;
protected function __construct(string $host, string $board_name, UserEntity $user_entity) protected function __construct(string $host, string $board_name, UserEntity $user_entity)
{ {
@ -22,7 +22,8 @@ abstract class MangboardCrawler extends MyCrawler
$this->_board_name = $board_name; $this->_board_name = $board_name;
$this->_user_entity = $user_entity; $this->_user_entity = $user_entity;
} }
protected function getMySocket() abstract public function execute(int $max_limit): void;
final protected function getMySocket()
{ {
if ($this->_mySocket === null) { if ($this->_mySocket === null) {
$this->_mySocket = new WebSocket($this->_host); $this->_mySocket = new WebSocket($this->_host);

View File

@ -13,9 +13,9 @@ class SirCrawler extends MangboardCrawler
{ {
parent::__construct($host, $board_name, $user_entity); parent::__construct($host, $board_name, $user_entity);
} }
protected function changeURLByMediaType(string $url): string protected function changeURLByCrawler(string $url): string
{ {
return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url)); return str_replace("/sir.kr/", "", parent::changeURLByCrawler($url));
} }
//작성내용 //작성내용
// <article class="sir_vbo "> // <article class="sir_vbo ">
@ -91,8 +91,8 @@ class SirCrawler extends MangboardCrawler
//작성내용 //작성내용
$tag = getenv("sir.view.content.tag"); $tag = getenv("sir.view.content.tag");
$selector = $this->getSelector($response, $tag); $selector = $this->getSelector($response, $tag);
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->getDebug()) { if ($this->getDebug()) {
throw new \Exception(sprintf( throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
@ -142,29 +142,32 @@ class SirCrawler extends MangboardCrawler
public function execute(int $max_limit): void public function execute(int $max_limit): void
{ {
try { try {
$listInfos = [];
if ($this->getDebug()) { if ($this->getDebug()) {
$this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]); $url = getenv("sir.view.test.url.{$this->_board_name}");
} log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
$response = $this->getMySocket()->getContent(getenv("sir.list.url")); $this->detail_page(1, ['detail_url' => $url]);
$this->getSelector($response, getenv("sir.list.tag"))->each( log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
function (Crawler $node) use (&$listInfos): void { } else {
$link_node = $node->filter(getenv("sir.list.item.link.tag")); $listInfos = [];
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함 $response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->_board_name}"));
$detail_url = str_replace("/sir.kr/", "", $link_node->attr("href")); $this->getSelector($response, getenv("sir.list.tag"))->each(
// $detail_url = $link_node->attr("href"); function (Crawler $node) use (&$listInfos): void {
$title = $link_node->text(); $link_node = $node->filter(getenv("sir.list.item.link.tag"));
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text(); // href url의 맨 앞이 /가 두개라서 한개를 빼기위함
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text(); $detail_url = $this->changeURLByCrawler($link_node->attr("href"));
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text(); // $detail_url = $link_node->attr("href");
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit]; $title = $link_node->text();
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
$hit = $node->filter(getenv("sir.list.item.hit.tag"))->text();
// $date = $node->filter(getenv("sir.list.item.date.tag"))->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
}
);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
} }
); $this->main_process($max_limit, $listInfos);
// throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true));
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
} }
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) { } catch (\Exception $e) {
log_message("warning", sprintf( log_message("warning", sprintf(

View File

@ -16,8 +16,8 @@ class YamapCrawler extends MangboardCrawler
$response = $this->getMySocket()->getContent($listInfo['detail_url']); $response = $this->getMySocket()->getContent($listInfo['detail_url']);
$tag = getenv("yamap.view.content.tag"); $tag = getenv("yamap.view.content.tag");
$selector = $this->getSelector($response, $tag); $selector = $this->getSelector($response, $tag);
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->getDebug()) { if ($this->getDebug()) {
throw new \Exception(sprintf( throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
@ -68,32 +68,36 @@ class YamapCrawler extends MangboardCrawler
public function execute(int $max_limit): void public function execute(int $max_limit): void
{ {
try { try {
$listInfos = [];
if ($this->getDebug()) { if ($this->getDebug()) {
$this->detail_page(1, ['detail_url' => getenv("yamap.view.test.url")]); $url = getenv("yamap.view.test.url.{$this->_board_name}");
} log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업시작");
$response = $this->getMySocket()->getContent(getenv("yamap.list.url")); $this->detail_page(1, ['detail_url' => $url]);
$selector = $this->getSelector($response, getenv("yamap.list.tag")); log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료");
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 } else {
$selector->filter(getenv("yamap.list.item.tag"))->each( $listInfos = [];
function (Crawler $node) use (&$listInfos): void { $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_board_name}"));
$hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text(); $selector = $this->getSelector($response, getenv("yamap.list.tag"));
$date = $node->filter(getenv("yamap.list.item.date.tag"))->text(); //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text(); $selector->filter(getenv("yamap.list.item.tag"))->each(
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool function (Crawler $node) use (&$listInfos): void {
if ($nickname != getenv("yamap.list.item.nickname.except")) { $hit = $node->filter(getenv("yamap.list.item.hit.tag"))->text();
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서 $date = $node->filter(getenv("yamap.list.item.date.tag"))->text();
$link_node = $node->filter(getenv("yamap.list.item.link.tag")); $nickname = $node->filter(getenv("yamap.list.item.nickname.tag"))->text();
$detail_url = $link_node->attr("href"); //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
$title = $link_node->children()->last()->text(); if ($nickname != getenv("yamap.list.item.nickname.except")) {
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit]; //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter(getenv("yamap.list.item.link.tag"));
$detail_url = $link_node->attr("href");
$title = $link_node->children()->last()->text();
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
}
} }
);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
} }
); $this->main_process($max_limit, $listInfos);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
} }
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) { } catch (\Exception $e) {
log_message("warning", sprintf( log_message("warning", sprintf(

View File

@ -61,8 +61,8 @@ class YamoonCrawler extends MangboardCrawler
//작성내용 //작성내용
$tag = getenv("yamoon.view.content.tag"); $tag = getenv("yamoon.view.content.tag");
$selector = $this->getSelector($response, $tag); $selector = $this->getSelector($response, $tag);
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]); $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls); $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->getDebug()) { if ($this->getDebug()) {
throw new \Exception(sprintf( throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
@ -90,25 +90,29 @@ class YamoonCrawler extends MangboardCrawler
public function execute(int $max_limit): void public function execute(int $max_limit): void
{ {
try { try {
$listInfos = [];
if ($this->getDebug()) { if ($this->getDebug()) {
$this->detail_page(1, ['detail_url' => getenv("yamoon.view.test.url")]); $url = getenv("yamoon.view.test.url.{$this->_board_name}");
} log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url")); $this->detail_page(1, ['detail_url' => $url]);
$this->getSelector($response, getenv("yamoon.list.tag"))->each( log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
function (Crawler $node) use (&$listInfos): void { } else {
$link_node = $node->filter(getenv("yamoon.list.item.link.tag")); $listInfos = [];
$detail_url = $link_node->attr("href"); $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_board_name}"));
$title = $link_node->text(); $this->getSelector($response, getenv("yamoon.list.tag"))->each(
$info_node = $node->filter(getenv("yamoon.list.item.info.tag")); function (Crawler $node) use (&$listInfos): void {
$infos = explode("|", $info_node->text()); $link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
$listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])]; $detail_url = $link_node->attr("href");
$title = $link_node->text();
$info_node = $node->filter(getenv("yamoon.list.item.info.tag"));
$infos = explode("|", $info_node->text());
$listInfos[] = ['title' => $title, 'detail_url' => $detail_url, 'nickname' => trim($infos[0]), 'hit' => trim($infos[2]), 'date' => trim($infos[4])];
}
);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
} }
); $this->main_process($max_limit, $listInfos);
if (!count($listInfos)) {
throw new \Exception("Target URL이 없습니다.");
} }
$this->main_process($max_limit, $listInfos);
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
} catch (\Exception $e) { } catch (\Exception $e) {
log_message("warning", sprintf( log_message("warning", sprintf(

View File

@ -33,28 +33,45 @@ abstract class MyCrawler extends CommonLibrary
return $crawler->filter($tag); return $crawler->filter($tag);
} }
protected function changeURLByMediaType(string $url): string protected function changeURLByCrawler(string $url): string
{ {
return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null; return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
} }
protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
{ {
$urls[$media_type] = []; switch ($media_type) {
$selector->filter($options["tag"])->each( case 'video':
function (Crawler $node) use (&$media_type, &$options, &$urls): void { try {
$url = $node->attr($options["attr"]); $url = $node->attr($attr); //<video src="test.mp4"></video> 또는 <video data-src="test.mp4"></video>
switch ($media_type) { } catch (\Exception) {
case 'video': $url = $node->children()->attr("src"); //<video><source src="test.mp4"></source</video>
if ($url === null) {
$url = $node->children()->attr("src");
}
break;
} }
break;
case 'img':
default:
$url = $node->attr($attr);
break;
}
return $url;
}
protected function getUrlsByMediaType(Crawler $selector, string $media_type, string $attr, array $urls = []): array
{
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_type} 작업시작--------");
$urls[$media_type] = [];
$selector->filter($media_type)->each(
function (Crawler $node) use (&$media_type, &$attr, &$urls): void {
$url = $this->getUrlByMediaType($node, $media_type, $attr);
if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) { if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) {
$urls[$media_type][] = $this->changeURLByMediaType($matches[0]); $urls[$media_type][] = $this->changeURLByCrawler($matches[0]);
} else { } else {
log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]\n"); log_message("debug", __FUNCTION__ . "-> {$media_type}:{$attr}\n");
log_message("debug", $node->html()); //Node 모든 속성은 DOMElement 변환 후 반환가능
$domNode = $node->getNode(0);
if ($domNode->hasAttributes()) {
foreach ($domNode->attributes as $attr) {
log_message("debug", "{$attr->nodeName} = {$attr->nodeValue}");
}
}
} }
} }
); );
@ -130,7 +147,7 @@ abstract class MyCrawler extends CommonLibrary
$max_limit = count($listInfos); $max_limit = count($listInfos);
} }
$total = count($listInfos); $total = count($listInfos);
$i = $this->getDebug() ? $max_limit : 1; $i = 1;
foreach ($listInfos as $listInfo) { foreach ($listInfos as $listInfo) {
if ($i <= $max_limit) { if ($i <= $max_limit) {
log_message("notice", __FUNCTION__ . " 게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작"); log_message("notice", __FUNCTION__ . " 게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작");

View File

@ -66,17 +66,18 @@ class WebSocket extends MySocket
//기본 Option //기본 Option
$options['cookies'] = $this->getCookieJar(); //쿠키값 $options['cookies'] = $this->getCookieJar(); //쿠키값
$options['timeout'] = getenv("socket.web.timeout"); // 5초 안에 응답이 없으면 타임아웃 $options['timeout'] = getenv("socket.web.timeout"); // 5초 안에 응답이 없으면 타임아웃
log_message("debug", "Socket URL-> " . $url); log_message("debug", __FUNCTION__ . "=> 호출 Socket URL-> " . $url);
return $this->getClient()->$method($url, $options); return $this->getClient()->$method($url, $options);
} }
public function getContent(string $url, $method = "get", array $options = []): string public function getContent(string $url, $method = "get", array $options = []): string
{ {
log_message("debug", __FUNCTION__ . "=> 호출 URL:" . $url);
$response = $this->getResponse($url, $method, $options); $response = $this->getResponse($url, $method, $options);
if ($response->getStatusCode() == 200) { if ($response->getStatusCode() == 200) {
// return $response->getBody()->getContents(); // return $response->getBody()->getContents();
return $response->getBody(); return $response->getBody();
} }
throw new \Exception("error", "{$url} 접속실패: " . $response->getStatusCode()); throw new \Exception("error", __FUNCTION__ . "=> {$url} 접속실패: " . $response->getStatusCode());
} }
} }

View File

@ -13,7 +13,7 @@ trait FileTrait
} }
} }
final protected function isFileType_FileTrait(string $file_ext, $type = "image"): bool final protected function isFileType_FileTrait(string $file_ext, $type = "img"): bool
{ {
switch ($type) { switch ($type) {
case "audio": case "audio":
@ -22,7 +22,7 @@ trait FileTrait
case "video": case "video":
$exts = ['mov', 'avi', 'mp4']; $exts = ['mov', 'avi', 'mp4'];
break; break;
case "image": case "img":
default: default:
$exts = ['jpg', 'jpeg', 'png', 'gif', 'webp']; $exts = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
break; break;