Automation init...3

This commit is contained in:
최준흠 2024-09-18 19:00:16 +09:00
parent 80b3e4bbc0
commit 073f80c46a
6 changed files with 113 additions and 47 deletions

View File

@ -55,6 +55,7 @@ class CrawlerController extends CommonController
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamapCrawler(getenv('yamap.host.url'), $board_name, $user_entity); $crawler = new YamapCrawler(getenv('yamap.host.url'), $board_name, $user_entity);
$crawler->isDebug = in_array('debug', $params); $crawler->isDebug = in_array('debug', $params);
$crawler->isCopy = in_array('copy', $params);
$crawler->execute(intval(getenv("yamap.list.max_limit"))); $crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {
@ -70,6 +71,7 @@ class CrawlerController extends CommonController
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $board_name, $user_entity); $crawler = new YamoonCrawler(getenv("yamoon.host.url"), $board_name, $user_entity);
$crawler->isDebug = in_array('debug', $params); $crawler->isDebug = in_array('debug', $params);
$crawler->isCopy = in_array('copy', $params);
$crawler->execute(intval(getenv("yamoon.list.max_limit"))); $crawler->execute(intval(getenv("yamoon.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {
@ -85,6 +87,7 @@ class CrawlerController extends CommonController
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new SirCrawler(getenv("sir.host.url"), $board_name, $user_entity); $crawler = new SirCrawler(getenv("sir.host.url"), $board_name, $user_entity);
$crawler->isDebug = in_array('debug', $params); $crawler->isDebug = in_array('debug', $params);
$crawler->isCopy = in_array('copy', $params);
$crawler->execute(intval(getenv("sir.list.max_limit"))); $crawler->execute(intval(getenv("sir.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {
@ -100,6 +103,7 @@ class CrawlerController extends CommonController
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달. //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new InvenCrawler(getenv("inven.host.url"), $board_name, $user_entity); $crawler = new InvenCrawler(getenv("inven.host.url"), $board_name, $user_entity);
$crawler->isDebug = in_array('debug', $params); $crawler->isDebug = in_array('debug', $params);
$crawler->isCopy = in_array('copy', $params);
$crawler->execute(intval(getenv("inven.list.max_limit"))); $crawler->execute(intval(getenv("inven.list.max_limit")));
return "완료되었습니다."; return "완료되었습니다.";
} catch (\Exception $e) { } catch (\Exception $e) {

View File

@ -48,28 +48,48 @@ class InvenCrawler extends MangboardCrawler
// </div> // </div>
protected function detail_process(int $cnt, array $listInfo): array protected function detail_process(int $cnt, array $listInfo): array
{ {
$response = $this->getMySocket()->getContent($listInfo['detail_url']); $response = $this->getMySocket()->getContent($listInfo['detail_url']);
$selector = $this->getSelector($response, getenv("inven.view.content.tag")); $selector = $this->getSelector($response, getenv("inven.view.content.tag"));
$media_urls = $this->getUrlsByMediaType($selector, "img", "src"); if ($this->isCopy) {
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls); $formDatas = [];
if ($this->isDebug) { $formDatas['image_path'] = "";
throw new \Exception(sprintf( $formDatas['content'] = $selector->html();
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n", //File DB 및 Board DB 등록작업등
__FUNCTION__, $this->getBoardModel()->createByCrawler(
var_export($listInfo, true), $this->getBoardsEntity(),
var_export($media_urls, true) $this->getUserEntity(),
)); $cnt,
$listInfo,
[],
$formDatas
);
} else { } else {
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리 $media_urls = $this->getUrlsByMediaType($selector, "img", "src");
$storages = $this->media_process($media_urls); $media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if (!count($storages)) { if ($this->isDebug) {
throw new \Exception("등록할 자료가 없습니다."); throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
__FUNCTION__,
var_export($listInfo, true),
var_export($media_urls, true)
));
} else {
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
$this->backend_process($cnt, $listInfo, $storages);
} }
$this->backend_process($cnt, $listInfo, $storages);
} }
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다."); log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
return $listInfo; return $listInfo;
} }
protected function copy_process(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
return $listInfo;
}
//리스트내용 //리스트내용
// <div class="board-list"> // <div class="board-list">
// <table> // <table>
@ -103,12 +123,17 @@ class InvenCrawler extends MangboardCrawler
{ {
try { try {
if ($this->isDebug) { if ($this->isDebug) {
$url = getenv("inven.view.test.url.{$this->_board_name}"); $listInfo = [];
$this->detail_process(1, ['detail_url' => $url]); $listInfo['title'] = 'test_title';
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료"); $listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("inven.view.test.url.{$this->getBoardName()}");
$this->detail_process(1, $listInfo);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
} else { } else {
$listInfos = []; $listInfos = [];
$response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}")); $response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->getBoardName()}"));
$this->getSelector($response, getenv("inven.list.tag"))->each( $this->getSelector($response, getenv("inven.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void { function (Crawler $node) use (&$listInfos): void {
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text(); $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();

View File

@ -2,9 +2,10 @@
namespace App\Libraries\MyCrawler\Mangboard; namespace App\Libraries\MyCrawler\Mangboard;
use App\Libraries\MySocket\WebSocket; use App\Entities\Mangboard\BoardsEntity;
use App\Entities\Mangboard\UserEntity; use App\Entities\Mangboard\UserEntity;
use App\Libraries\MyCrawler\MyCrawler; use App\Libraries\MyCrawler\MyCrawler;
use App\Libraries\MySocket\WebSocket;
use App\Libraries\MyStorage\MangboardStorage; use App\Libraries\MyStorage\MangboardStorage;
use App\Models\Mangboard\BoardModel; use App\Models\Mangboard\BoardModel;
use App\Models\Mangboard\BoardsModel; use App\Models\Mangboard\BoardsModel;
@ -13,13 +14,15 @@ abstract class MangboardCrawler extends MyCrawler
{ {
private $_mySocket = null; private $_mySocket = null;
private $_host = ""; private $_host = "";
protected $_board_name = ""; private $_board_name = "";
private $_board_model = null;
private $_boards_entity = null;
private $_user_entity = null; private $_user_entity = null;
protected function __construct(string $host, string $board_name, UserEntity $user_entity) protected function __construct(string $host, string $board_name, UserEntity $user_entity)
{ {
parent::__construct(); parent::__construct();
$this->_host = $host; $this->_host = $host;
$this->_board_name = $board_name; $this->_board_name = $board_name;
$this->_user_entity = $user_entity; $this->_user_entity = $user_entity;
} }
abstract protected function detail_process(int $cnt, array $listInfo): array; abstract protected function detail_process(int $cnt, array $listInfo): array;
@ -33,28 +36,47 @@ abstract class MangboardCrawler extends MyCrawler
} }
final protected function createMyStorage() final protected function createMyStorage()
{ {
return new MangboardStorage($this->_board_name, $this->_user_entity); return new MangboardStorage($this->getBoardName(), $this->getUserEntity());
}
final protected function getBoardModel(): BoardModel
{
if ($this->_board_model === null) {
$this->_board_model = new BoardModel("mb_" . $this->getBoardName());
}
return $this->_board_model;
}
final protected function getBoardName(): string
{
return $this->_board_name;
}
final protected function getBoardsEntity(): BoardsEntity
{
if ($this->_boards_entity === null) {
$boardsModel = new BoardsModel();
$this->_boards_entity = $boardsModel->getEntityByID($this->getBoardName());
if ($this->_boards_entity === null) {
throw new \Exception(__FUNCTION__ . "=> {$this->getBoardName()}에 해당 Board 정보가 존재하지 않습니다.");
}
}
return $this->_boards_entity;
}
final protected function getUserEntity(): UserEntity
{
return $this->_user_entity;
} }
protected function backend_process(int $cnt, array $listInfo, array $storages) protected function backend_process(int $cnt, array $listInfo, array $storages)
{ {
//File DB 및 Board DB 등록작업등 //File DB 및 Board DB 등록작업등
$baord_name = $this->_board_name; $board_entity = $this->getBoardModel()->createByCrawler(
$boardsModel = new BoardsModel(); $this->getBoardsEntity(),
$boards_entity = $boardsModel->getEntityByID($this->_board_name); $this->getUserEntity(),
if ($boards_entity === null) {
throw new \Exception(__FUNCTION__ . "=>{$this->_board_name}에 해당 Board 정보가 존재하지 않습니다.");
}
$boardModel = new BoardModel("mb_" . $baord_name);
$board_entity = $boardModel->createByCrawler(
$boards_entity,
$this->_user_entity,
$cnt, $cnt,
$listInfo, $listInfo,
$storages $storages
); );
foreach ($storages as $storage) { foreach ($storages as $storage) {
try { try {
$storage->backend_process($boards_entity, $board_entity, $boardModel->getTable()); $storage->backend_process($this->getBoardsEntity(), $board_entity, $this->getBoardModel()->getTable());
} catch (\Exception $e) { } catch (\Exception $e) {
log_message("notice", sprintf( log_message("notice", sprintf(
"\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n", "\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",

View File

@ -143,12 +143,17 @@ class SirCrawler extends MangboardCrawler
{ {
try { try {
if ($this->isDebug) { if ($this->isDebug) {
$url = getenv("sir.view.test.url.{$this->_board_name}"); $listInfo = [];
$this->detail_process(1, ['detail_url' => $url]); $listInfo['title'] = 'test_title';
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료"); $listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("sir.view.test.url.{$this->getBoardName()}");
$this->detail_process(1, $listInfo);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
} else { } else {
$listInfos = []; $listInfos = [];
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->_board_name}")); $response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->getBoardName()}"));
$this->getSelector($response, getenv("sir.list.tag"))->each( $this->getSelector($response, getenv("sir.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void { function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("sir.list.item.link.tag")); $link_node = $node->filter(getenv("sir.list.item.link.tag"));

View File

@ -69,12 +69,17 @@ class YamapCrawler extends MangboardCrawler
{ {
try { try {
if ($this->isDebug) { if ($this->isDebug) {
$url = getenv("yamap.view.test.url.{$this->_board_name}"); $listInfo = [];
$this->detail_process(1, ['detail_url' => $url]); $listInfo['title'] = 'test_title';
log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료"); $listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("yamap.view.test.url.{$this->getBoardName()}");
$this->detail_process(1, $listInfo);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
} else { } else {
$listInfos = []; $listInfos = [];
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_board_name}")); $response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->getBoardName()}"));
$selector = $this->getSelector($response, getenv("yamap.list.tag")); $selector = $this->getSelector($response, getenv("yamap.list.tag"));
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$selector->filter(getenv("yamap.list.item.tag"))->each( $selector->filter(getenv("yamap.list.item.tag"))->each(

View File

@ -91,12 +91,17 @@ class YamoonCrawler extends MangboardCrawler
{ {
try { try {
if ($this->isDebug) { if ($this->isDebug) {
$url = getenv("yamoon.view.test.url.{$this->_board_name}"); $listInfo = [];
$this->detail_process(1, ['detail_url' => $url]); $listInfo['title'] = 'test_title';
log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료"); $listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("yamoon.view.test.url.{$this->getBoardName()}");
$this->detail_process(1, $listInfo);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
} else { } else {
$listInfos = []; $listInfos = [];
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_board_name}")); $response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->getBoardName()}"));
$this->getSelector($response, getenv("yamoon.list.tag"))->each( $this->getSelector($response, getenv("yamoon.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void { function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("yamoon.list.item.link.tag")); $link_node = $node->filter(getenv("yamoon.list.item.link.tag"));