Automation init...3
This commit is contained in:
parent
2734cfa694
commit
6a4428755e
@ -31,17 +31,13 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
|
|||||||
$routes->cli('check_level/(:alpha)', 'UserController::check_level/$1');
|
$routes->cli('check_level/(:alpha)', 'UserController::check_level/$1');
|
||||||
});
|
});
|
||||||
$routes->group('crawler', function ($routes) {
|
$routes->group('crawler', function ($routes) {
|
||||||
$routes->cli('yamap', 'CrawlerController::yamap');
|
$routes->cli('yamap/(:any)', 'CrawlerController::yamap/$1');
|
||||||
$routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1');
|
$routes->cli('yamap/(:any)/(:any)', 'CrawlerController::yamap/$1/$2');
|
||||||
$routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2');
|
$routes->cli('yamoon/(:any)', 'CrawlerController::yamoon/$1');
|
||||||
$routes->cli('yamoon', 'CrawlerController::yamoon');
|
$routes->cli('yamoon/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2');
|
||||||
$routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1');
|
$routes->cli('sir/(:any)', 'CrawlerController::sir/$1');
|
||||||
$routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2');
|
$routes->cli('sir/(:any)/(:any)', 'CrawlerController::sir/$1/$2');
|
||||||
$routes->cli('sir', 'CrawlerController::sir');
|
$routes->cli('inven/(:any)', 'CrawlerController::inven/$1');
|
||||||
$routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1');
|
$routes->cli('inven/(:any)/(:any)', 'CrawlerController::inven/$1/$2');
|
||||||
$routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2');
|
|
||||||
$routes->cli('inven', 'CrawlerController::inven');
|
|
||||||
$routes->cli('inven/(:alpha)', 'CrawlerController::inven/$1');
|
|
||||||
$routes->cli('inven/(:alpha)/(:any)', 'CrawlerController::inven/$1/$2');
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -47,16 +47,14 @@ class CrawlerController extends CommonController
|
|||||||
log_message("notice", "{$id}로 로그인 성공");
|
log_message("notice", "{$id}로 로그인 성공");
|
||||||
return $user_entity;
|
return $user_entity;
|
||||||
}
|
}
|
||||||
public function yamap(string $id = "", string $option = ""): string
|
public function yamap(string $board_name, ...$params): string
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
//1. 사이트 로그인 처리
|
//1. 사이트 로그인 처리
|
||||||
$user_entity = $this->login($id);
|
$user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
|
||||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||||
$crawler = new YamapCrawler(getenv('yamap.host.url'), getenv("yamap.host.board_name"), $user_entity);
|
$crawler = new YamapCrawler(getenv('yamap.host.url'), $board_name, $user_entity);
|
||||||
if ($option) {
|
$crawler->setDebug(in_array('debug', $params));
|
||||||
$crawler->setDebug($option === "debug" ? true : false);
|
|
||||||
}
|
|
||||||
$crawler->execute(intval(getenv("yamap.list.max_limit")));
|
$crawler->execute(intval(getenv("yamap.list.max_limit")));
|
||||||
return "완료되었습니다.";
|
return "완료되었습니다.";
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
@ -64,33 +62,29 @@ class CrawlerController extends CommonController
|
|||||||
return $e->getMessage();
|
return $e->getMessage();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public function yamoon(string $id = "", string $option = ""): string
|
public function yamoon(string $board_name, ...$params): string
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
//1. 사이트 로그인 처리
|
//1. 사이트 로그인 처리
|
||||||
$user_entity = $this->login($id);
|
$user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
|
||||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||||
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), getenv("yamoon.host.board_name"), $user_entity);
|
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $board_name, $user_entity);
|
||||||
if ($option) {
|
$crawler->setDebug(in_array('debug', $params));
|
||||||
$crawler->setDebug($option === "debug" ? true : false);
|
$crawler->execute(intval(getenv("yamoon.list.max_limit")));
|
||||||
}
|
|
||||||
$crawler->execute(intval(getenv("yamap.list.max_limit")));
|
|
||||||
return "완료되었습니다.";
|
return "완료되었습니다.";
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
log_message("error", $e->getMessage());
|
log_message("error", $e->getMessage());
|
||||||
return $e->getMessage();
|
return $e->getMessage();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public function sir(string $id = "", string $option = ""): string
|
public function sir(string $board_name, ...$params): string
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
//1. 사이트 로그인 처리
|
//1. 사이트 로그인 처리
|
||||||
$user_entity = $this->login($id);
|
$user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
|
||||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||||
$crawler = new SirCrawler(getenv("sir.host.url"), getenv("sir.host.board_name"), $user_entity);
|
$crawler = new SirCrawler(getenv("sir.host.url"), $board_name, $user_entity);
|
||||||
if ($option) {
|
$crawler->setDebug(in_array('debug', $params));
|
||||||
$crawler->setDebug($option === "debug" ? true : false);
|
|
||||||
}
|
|
||||||
$crawler->execute(intval(getenv("sir.list.max_limit")));
|
$crawler->execute(intval(getenv("sir.list.max_limit")));
|
||||||
return "완료되었습니다.";
|
return "완료되었습니다.";
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
@ -98,16 +92,14 @@ class CrawlerController extends CommonController
|
|||||||
return $e->getMessage();
|
return $e->getMessage();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public function inven(string $id = "", string $option = ""): string
|
public function inven(string $board_name, ...$params): string
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
//1. 사이트 로그인 처리
|
//1. 사이트 로그인 처리
|
||||||
$user_entity = $this->login($id);
|
$user_entity = $this->login(in_array('id', $params) ? $params['id'] : "");
|
||||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||||
$crawler = new InvenCrawler(getenv("inven.host.url"), getenv("inven.host.board_name"), $user_entity);
|
$crawler = new InvenCrawler(getenv("inven.host.url"), $board_name, $user_entity);
|
||||||
if ($option) {
|
$crawler->setDebug(in_array('debug', $params));
|
||||||
$crawler->setDebug($option === "debug" ? true : false);
|
|
||||||
}
|
|
||||||
$crawler->execute(intval(getenv("inven.list.max_limit")));
|
$crawler->execute(intval(getenv("inven.list.max_limit")));
|
||||||
return "완료되었습니다.";
|
return "완료되었습니다.";
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
|
|||||||
@ -11,6 +11,24 @@ class InvenCrawler extends MangboardCrawler
|
|||||||
{
|
{
|
||||||
parent::__construct($host, $board_name, $user_entity);
|
parent::__construct($host, $board_name, $user_entity);
|
||||||
}
|
}
|
||||||
|
protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
|
||||||
|
{
|
||||||
|
switch ($media_type) {
|
||||||
|
case 'video':
|
||||||
|
$url = parent::getUrlByMediaType($node, $media_type, $attr);
|
||||||
|
//그래도 null이면 data-src로 추출해본다.
|
||||||
|
$attributes = $node->extract(['data-src']);
|
||||||
|
if (count($attributes)) {
|
||||||
|
$url = $attributes[0];
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'img':
|
||||||
|
default:
|
||||||
|
$url = parent::getUrlByMediaType($node, $media_type, $attr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
//작성내용
|
//작성내용
|
||||||
// <div class="articleContent">
|
// <div class="articleContent">
|
||||||
// <div id="imageCollectDiv" class="contentBody">
|
// <div id="imageCollectDiv" class="contentBody">
|
||||||
@ -33,8 +51,8 @@ class InvenCrawler extends MangboardCrawler
|
|||||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||||
$tag = getenv("inven.view.content.tag");
|
$tag = getenv("inven.view.content.tag");
|
||||||
$selector = $this->getSelector($response, $tag);
|
$selector = $this->getSelector($response, $tag);
|
||||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
throw new \Exception(sprintf(
|
throw new \Exception(sprintf(
|
||||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||||
@ -84,12 +102,14 @@ class InvenCrawler extends MangboardCrawler
|
|||||||
public function execute(int $max_limit): void
|
public function execute(int $max_limit): void
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
$listInfos = [];
|
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
$this->detail_page(1, ['detail_url' => getenv("inven.view.test.url")]);
|
$url = getenv("inven.view.test.url.{$this->_board_name}");
|
||||||
}
|
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
|
||||||
$response = $this->getMySocket()->getContent(getenv("inven.list.url"));
|
$this->detail_page(1, ['detail_url' => $url]);
|
||||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
|
||||||
|
} else {
|
||||||
|
$listInfos = [];
|
||||||
|
$response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}"));
|
||||||
$this->getSelector($response, getenv("inven.list.tag"))->each(
|
$this->getSelector($response, getenv("inven.list.tag"))->each(
|
||||||
function (Crawler $node) use (&$listInfos): void {
|
function (Crawler $node) use (&$listInfos): void {
|
||||||
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
|
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
|
||||||
@ -106,6 +126,7 @@ class InvenCrawler extends MangboardCrawler
|
|||||||
throw new \Exception("Target URL이 없습니다.");
|
throw new \Exception("Target URL이 없습니다.");
|
||||||
}
|
}
|
||||||
$this->main_process($max_limit, $listInfos);
|
$this->main_process($max_limit, $listInfos);
|
||||||
|
}
|
||||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
log_message("warning", sprintf(
|
log_message("warning", sprintf(
|
||||||
|
|||||||
@ -11,9 +11,9 @@ use App\Models\Mangboard\BoardsModel;
|
|||||||
|
|
||||||
abstract class MangboardCrawler extends MyCrawler
|
abstract class MangboardCrawler extends MyCrawler
|
||||||
{
|
{
|
||||||
protected $_mySocket = null;
|
private $_mySocket = null;
|
||||||
protected $_host = "";
|
private $_host = "";
|
||||||
private $_board_name = "";
|
protected $_board_name = "";
|
||||||
private $_user_entity = null;
|
private $_user_entity = null;
|
||||||
protected function __construct(string $host, string $board_name, UserEntity $user_entity)
|
protected function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||||
{
|
{
|
||||||
@ -22,7 +22,8 @@ abstract class MangboardCrawler extends MyCrawler
|
|||||||
$this->_board_name = $board_name;
|
$this->_board_name = $board_name;
|
||||||
$this->_user_entity = $user_entity;
|
$this->_user_entity = $user_entity;
|
||||||
}
|
}
|
||||||
protected function getMySocket()
|
abstract public function execute(int $max_limit): void;
|
||||||
|
final protected function getMySocket()
|
||||||
{
|
{
|
||||||
if ($this->_mySocket === null) {
|
if ($this->_mySocket === null) {
|
||||||
$this->_mySocket = new WebSocket($this->_host);
|
$this->_mySocket = new WebSocket($this->_host);
|
||||||
|
|||||||
@ -13,9 +13,9 @@ class SirCrawler extends MangboardCrawler
|
|||||||
{
|
{
|
||||||
parent::__construct($host, $board_name, $user_entity);
|
parent::__construct($host, $board_name, $user_entity);
|
||||||
}
|
}
|
||||||
protected function changeURLByMediaType(string $url): string
|
protected function changeURLByCrawler(string $url): string
|
||||||
{
|
{
|
||||||
return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url));
|
return str_replace("/sir.kr/", "", parent::changeURLByCrawler($url));
|
||||||
}
|
}
|
||||||
//작성내용
|
//작성내용
|
||||||
// <article class="sir_vbo ">
|
// <article class="sir_vbo ">
|
||||||
@ -91,8 +91,8 @@ class SirCrawler extends MangboardCrawler
|
|||||||
//작성내용
|
//작성내용
|
||||||
$tag = getenv("sir.view.content.tag");
|
$tag = getenv("sir.view.content.tag");
|
||||||
$selector = $this->getSelector($response, $tag);
|
$selector = $this->getSelector($response, $tag);
|
||||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
throw new \Exception(sprintf(
|
throw new \Exception(sprintf(
|
||||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||||
@ -142,16 +142,19 @@ class SirCrawler extends MangboardCrawler
|
|||||||
public function execute(int $max_limit): void
|
public function execute(int $max_limit): void
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
$listInfos = [];
|
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
$this->detail_page(1, ['detail_url' => getenv("sir.view.test.url")]);
|
$url = getenv("sir.view.test.url.{$this->_board_name}");
|
||||||
}
|
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
|
||||||
$response = $this->getMySocket()->getContent(getenv("sir.list.url"));
|
$this->detail_page(1, ['detail_url' => $url]);
|
||||||
|
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
|
||||||
|
} else {
|
||||||
|
$listInfos = [];
|
||||||
|
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->_board_name}"));
|
||||||
$this->getSelector($response, getenv("sir.list.tag"))->each(
|
$this->getSelector($response, getenv("sir.list.tag"))->each(
|
||||||
function (Crawler $node) use (&$listInfos): void {
|
function (Crawler $node) use (&$listInfos): void {
|
||||||
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
|
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
|
||||||
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
|
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
|
||||||
$detail_url = str_replace("/sir.kr/", "", $link_node->attr("href"));
|
$detail_url = $this->changeURLByCrawler($link_node->attr("href"));
|
||||||
// $detail_url = $link_node->attr("href");
|
// $detail_url = $link_node->attr("href");
|
||||||
$title = $link_node->text();
|
$title = $link_node->text();
|
||||||
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
|
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
|
||||||
@ -160,11 +163,11 @@ class SirCrawler extends MangboardCrawler
|
|||||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
|
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => "", 'hit' => $hit];
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
// throw new \Exception("Target URL이 없습니다." . var_export($listInfos, true));
|
|
||||||
if (!count($listInfos)) {
|
if (!count($listInfos)) {
|
||||||
throw new \Exception("Target URL이 없습니다.");
|
throw new \Exception("Target URL이 없습니다.");
|
||||||
}
|
}
|
||||||
$this->main_process($max_limit, $listInfos);
|
$this->main_process($max_limit, $listInfos);
|
||||||
|
}
|
||||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
log_message("warning", sprintf(
|
log_message("warning", sprintf(
|
||||||
|
|||||||
@ -16,8 +16,8 @@ class YamapCrawler extends MangboardCrawler
|
|||||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||||
$tag = getenv("yamap.view.content.tag");
|
$tag = getenv("yamap.view.content.tag");
|
||||||
$selector = $this->getSelector($response, $tag);
|
$selector = $this->getSelector($response, $tag);
|
||||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
throw new \Exception(sprintf(
|
throw new \Exception(sprintf(
|
||||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||||
@ -68,11 +68,14 @@ class YamapCrawler extends MangboardCrawler
|
|||||||
public function execute(int $max_limit): void
|
public function execute(int $max_limit): void
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
$listInfos = [];
|
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
$this->detail_page(1, ['detail_url' => getenv("yamap.view.test.url")]);
|
$url = getenv("yamap.view.test.url.{$this->_board_name}");
|
||||||
}
|
log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업시작");
|
||||||
$response = $this->getMySocket()->getContent(getenv("yamap.list.url"));
|
$this->detail_page(1, ['detail_url' => $url]);
|
||||||
|
log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료");
|
||||||
|
} else {
|
||||||
|
$listInfos = [];
|
||||||
|
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_board_name}"));
|
||||||
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
|
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
|
||||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||||
$selector->filter(getenv("yamap.list.item.tag"))->each(
|
$selector->filter(getenv("yamap.list.item.tag"))->each(
|
||||||
@ -94,6 +97,7 @@ class YamapCrawler extends MangboardCrawler
|
|||||||
throw new \Exception("Target URL이 없습니다.");
|
throw new \Exception("Target URL이 없습니다.");
|
||||||
}
|
}
|
||||||
$this->main_process($max_limit, $listInfos);
|
$this->main_process($max_limit, $listInfos);
|
||||||
|
}
|
||||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
log_message("warning", sprintf(
|
log_message("warning", sprintf(
|
||||||
|
|||||||
@ -61,8 +61,8 @@ class YamoonCrawler extends MangboardCrawler
|
|||||||
//작성내용
|
//작성내용
|
||||||
$tag = getenv("yamoon.view.content.tag");
|
$tag = getenv("yamoon.view.content.tag");
|
||||||
$selector = $this->getSelector($response, $tag);
|
$selector = $this->getSelector($response, $tag);
|
||||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
||||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
throw new \Exception(sprintf(
|
throw new \Exception(sprintf(
|
||||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||||
@ -90,11 +90,14 @@ class YamoonCrawler extends MangboardCrawler
|
|||||||
public function execute(int $max_limit): void
|
public function execute(int $max_limit): void
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
$listInfos = [];
|
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
$this->detail_page(1, ['detail_url' => getenv("yamoon.view.test.url")]);
|
$url = getenv("yamoon.view.test.url.{$this->_board_name}");
|
||||||
}
|
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업시작");
|
||||||
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url"));
|
$this->detail_page(1, ['detail_url' => $url]);
|
||||||
|
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
|
||||||
|
} else {
|
||||||
|
$listInfos = [];
|
||||||
|
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_board_name}"));
|
||||||
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
|
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
|
||||||
function (Crawler $node) use (&$listInfos): void {
|
function (Crawler $node) use (&$listInfos): void {
|
||||||
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
|
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));
|
||||||
@ -109,6 +112,7 @@ class YamoonCrawler extends MangboardCrawler
|
|||||||
throw new \Exception("Target URL이 없습니다.");
|
throw new \Exception("Target URL이 없습니다.");
|
||||||
}
|
}
|
||||||
$this->main_process($max_limit, $listInfos);
|
$this->main_process($max_limit, $listInfos);
|
||||||
|
}
|
||||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
log_message("warning", sprintf(
|
log_message("warning", sprintf(
|
||||||
|
|||||||
@ -33,28 +33,45 @@ abstract class MyCrawler extends CommonLibrary
|
|||||||
return $crawler->filter($tag);
|
return $crawler->filter($tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function changeURLByMediaType(string $url): string
|
protected function changeURLByCrawler(string $url): string
|
||||||
{
|
{
|
||||||
return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
|
return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
|
||||||
}
|
}
|
||||||
protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
|
protected function getUrlByMediaType(Crawler $node, string $media_type, string $attr): null|string
|
||||||
{
|
{
|
||||||
$urls[$media_type] = [];
|
|
||||||
$selector->filter($options["tag"])->each(
|
|
||||||
function (Crawler $node) use (&$media_type, &$options, &$urls): void {
|
|
||||||
$url = $node->attr($options["attr"]);
|
|
||||||
switch ($media_type) {
|
switch ($media_type) {
|
||||||
case 'video':
|
case 'video':
|
||||||
if ($url === null) {
|
try {
|
||||||
$url = $node->children()->attr("src");
|
$url = $node->attr($attr); //<video src="test.mp4"></video> 또는 <video data-src="test.mp4"></video>
|
||||||
|
} catch (\Exception) {
|
||||||
|
$url = $node->children()->attr("src"); //<video><source src="test.mp4"></source</video>
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'img':
|
||||||
|
default:
|
||||||
|
$url = $node->attr($attr);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
return $url;
|
||||||
|
}
|
||||||
|
protected function getUrlsByMediaType(Crawler $selector, string $media_type, string $attr, array $urls = []): array
|
||||||
|
{
|
||||||
|
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_type} 작업시작--------");
|
||||||
|
$urls[$media_type] = [];
|
||||||
|
$selector->filter($media_type)->each(
|
||||||
|
function (Crawler $node) use (&$media_type, &$attr, &$urls): void {
|
||||||
|
$url = $this->getUrlByMediaType($node, $media_type, $attr);
|
||||||
if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) {
|
if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) {
|
||||||
$urls[$media_type][] = $this->changeURLByMediaType($matches[0]);
|
$urls[$media_type][] = $this->changeURLByCrawler($matches[0]);
|
||||||
} else {
|
} else {
|
||||||
log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]\n");
|
log_message("debug", __FUNCTION__ . "-> {$media_type}:{$attr}\n");
|
||||||
log_message("debug", $node->html());
|
//Node 모든 속성은 DOMElement 변환 후 반환가능
|
||||||
|
$domNode = $node->getNode(0);
|
||||||
|
if ($domNode->hasAttributes()) {
|
||||||
|
foreach ($domNode->attributes as $attr) {
|
||||||
|
log_message("debug", "{$attr->nodeName} = {$attr->nodeValue}");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
@ -130,7 +147,7 @@ abstract class MyCrawler extends CommonLibrary
|
|||||||
$max_limit = count($listInfos);
|
$max_limit = count($listInfos);
|
||||||
}
|
}
|
||||||
$total = count($listInfos);
|
$total = count($listInfos);
|
||||||
$i = $this->getDebug() ? $max_limit : 1;
|
$i = 1;
|
||||||
foreach ($listInfos as $listInfo) {
|
foreach ($listInfos as $listInfo) {
|
||||||
if ($i <= $max_limit) {
|
if ($i <= $max_limit) {
|
||||||
log_message("notice", __FUNCTION__ . " 게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작");
|
log_message("notice", __FUNCTION__ . " 게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작");
|
||||||
|
|||||||
@ -66,17 +66,18 @@ class WebSocket extends MySocket
|
|||||||
//기본 Option
|
//기본 Option
|
||||||
$options['cookies'] = $this->getCookieJar(); //쿠키값
|
$options['cookies'] = $this->getCookieJar(); //쿠키값
|
||||||
$options['timeout'] = getenv("socket.web.timeout"); // 5초 안에 응답이 없으면 타임아웃
|
$options['timeout'] = getenv("socket.web.timeout"); // 5초 안에 응답이 없으면 타임아웃
|
||||||
log_message("debug", "Socket URL-> " . $url);
|
log_message("debug", __FUNCTION__ . "=> 호출 Socket URL-> " . $url);
|
||||||
return $this->getClient()->$method($url, $options);
|
return $this->getClient()->$method($url, $options);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getContent(string $url, $method = "get", array $options = []): string
|
public function getContent(string $url, $method = "get", array $options = []): string
|
||||||
{
|
{
|
||||||
|
log_message("debug", __FUNCTION__ . "=> 호출 URL:" . $url);
|
||||||
$response = $this->getResponse($url, $method, $options);
|
$response = $this->getResponse($url, $method, $options);
|
||||||
if ($response->getStatusCode() == 200) {
|
if ($response->getStatusCode() == 200) {
|
||||||
// return $response->getBody()->getContents();
|
// return $response->getBody()->getContents();
|
||||||
return $response->getBody();
|
return $response->getBody();
|
||||||
}
|
}
|
||||||
throw new \Exception("error", "{$url} 접속실패: " . $response->getStatusCode());
|
throw new \Exception("error", __FUNCTION__ . "=> {$url} 접속실패: " . $response->getStatusCode());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,7 +13,7 @@ trait FileTrait
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final protected function isFileType_FileTrait(string $file_ext, $type = "image"): bool
|
final protected function isFileType_FileTrait(string $file_ext, $type = "img"): bool
|
||||||
{
|
{
|
||||||
switch ($type) {
|
switch ($type) {
|
||||||
case "audio":
|
case "audio":
|
||||||
@ -22,7 +22,7 @@ trait FileTrait
|
|||||||
case "video":
|
case "video":
|
||||||
$exts = ['mov', 'avi', 'mp4'];
|
$exts = ['mov', 'avi', 'mp4'];
|
||||||
break;
|
break;
|
||||||
case "image":
|
case "img":
|
||||||
default:
|
default:
|
||||||
$exts = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
|
$exts = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
|
||||||
break;
|
break;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user