Automation init...3

This commit is contained in:
최준흠 2024-09-18 19:00:16 +09:00
parent 80b3e4bbc0
commit 073f80c46a
6 changed files with 113 additions and 47 deletions

View File

@ -55,6 +55,7 @@ class CrawlerController extends CommonController
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamapCrawler(getenv('yamap.host.url'), $board_name, $user_entity);
$crawler->isDebug = in_array('debug', $params);
$crawler->isCopy = in_array('copy', $params);
$crawler->execute(intval(getenv("yamap.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
@ -70,6 +71,7 @@ class CrawlerController extends CommonController
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $board_name, $user_entity);
$crawler->isDebug = in_array('debug', $params);
$crawler->isCopy = in_array('copy', $params);
$crawler->execute(intval(getenv("yamoon.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
@ -85,6 +87,7 @@ class CrawlerController extends CommonController
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new SirCrawler(getenv("sir.host.url"), $board_name, $user_entity);
$crawler->isDebug = in_array('debug', $params);
$crawler->isCopy = in_array('copy', $params);
$crawler->execute(intval(getenv("sir.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {
@ -100,6 +103,7 @@ class CrawlerController extends CommonController
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
$crawler = new InvenCrawler(getenv("inven.host.url"), $board_name, $user_entity);
$crawler->isDebug = in_array('debug', $params);
$crawler->isCopy = in_array('copy', $params);
$crawler->execute(intval(getenv("inven.list.max_limit")));
return "완료되었습니다.";
} catch (\Exception $e) {

View File

@ -48,28 +48,48 @@ class InvenCrawler extends MangboardCrawler
// </div>
protected function detail_process(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$selector = $this->getSelector($response, getenv("inven.view.content.tag"));
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->isDebug) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
__FUNCTION__,
var_export($listInfo, true),
var_export($media_urls, true)
));
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
$selector = $this->getSelector($response, getenv("inven.view.content.tag"));
if ($this->isCopy) {
$formDatas = [];
$formDatas['image_path'] = "";
$formDatas['content'] = $selector->html();
//File DB 및 Board DB 등록작업등
$this->getBoardModel()->createByCrawler(
$this->getBoardsEntity(),
$this->getUserEntity(),
$cnt,
$listInfo,
[],
$formDatas
);
} else {
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
if ($this->isDebug) {
throw new \Exception(sprintf(
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
__FUNCTION__,
var_export($listInfo, true),
var_export($media_urls, true)
));
} else {
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
$storages = $this->media_process($media_urls);
if (!count($storages)) {
throw new \Exception("등록할 자료가 없습니다.");
}
$this->backend_process($cnt, $listInfo, $storages);
}
$this->backend_process($cnt, $listInfo, $storages);
}
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
return $listInfo;
}
protected function copy_process(int $cnt, array $listInfo): array
{
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
return $listInfo;
}
//리스트내용
// <div class="board-list">
// <table>
@ -103,12 +123,17 @@ class InvenCrawler extends MangboardCrawler
{
try {
if ($this->isDebug) {
$url = getenv("inven.view.test.url.{$this->_board_name}");
$this->detail_process(1, ['detail_url' => $url]);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
$listInfo = [];
$listInfo['title'] = 'test_title';
$listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("inven.view.test.url.{$this->getBoardName()}");
$this->detail_process(1, $listInfo);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
} else {
$listInfos = [];
$response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->_board_name}"));
$response = $this->getMySocket()->getContent(getenv("inven.list.url.{$this->getBoardName()}"));
$this->getSelector($response, getenv("inven.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();

View File

@ -2,9 +2,10 @@
namespace App\Libraries\MyCrawler\Mangboard;
use App\Libraries\MySocket\WebSocket;
use App\Entities\Mangboard\BoardsEntity;
use App\Entities\Mangboard\UserEntity;
use App\Libraries\MyCrawler\MyCrawler;
use App\Libraries\MySocket\WebSocket;
use App\Libraries\MyStorage\MangboardStorage;
use App\Models\Mangboard\BoardModel;
use App\Models\Mangboard\BoardsModel;
@ -13,13 +14,15 @@ abstract class MangboardCrawler extends MyCrawler
{
private $_mySocket = null;
private $_host = "";
protected $_board_name = "";
private $_board_name = "";
private $_board_model = null;
private $_boards_entity = null;
private $_user_entity = null;
protected function __construct(string $host, string $board_name, UserEntity $user_entity)
{
parent::__construct();
$this->_host = $host;
$this->_board_name = $board_name;
$this->_board_name = $board_name;
$this->_user_entity = $user_entity;
}
abstract protected function detail_process(int $cnt, array $listInfo): array;
@ -33,28 +36,47 @@ abstract class MangboardCrawler extends MyCrawler
}
final protected function createMyStorage()
{
return new MangboardStorage($this->_board_name, $this->_user_entity);
return new MangboardStorage($this->getBoardName(), $this->getUserEntity());
}
final protected function getBoardModel(): BoardModel
{
if ($this->_board_model === null) {
$this->_board_model = new BoardModel("mb_" . $this->getBoardName());
}
return $this->_board_model;
}
final protected function getBoardName(): string
{
return $this->_board_name;
}
final protected function getBoardsEntity(): BoardsEntity
{
if ($this->_boards_entity === null) {
$boardsModel = new BoardsModel();
$this->_boards_entity = $boardsModel->getEntityByID($this->getBoardName());
if ($this->_boards_entity === null) {
throw new \Exception(__FUNCTION__ . "=> {$this->getBoardName()}에 해당 Board 정보가 존재하지 않습니다.");
}
}
return $this->_boards_entity;
}
final protected function getUserEntity(): UserEntity
{
return $this->_user_entity;
}
protected function backend_process(int $cnt, array $listInfo, array $storages)
{
//File DB 및 Board DB 등록작업등
$baord_name = $this->_board_name;
$boardsModel = new BoardsModel();
$boards_entity = $boardsModel->getEntityByID($this->_board_name);
if ($boards_entity === null) {
throw new \Exception(__FUNCTION__ . "=>{$this->_board_name}에 해당 Board 정보가 존재하지 않습니다.");
}
$boardModel = new BoardModel("mb_" . $baord_name);
$board_entity = $boardModel->createByCrawler(
$boards_entity,
$this->_user_entity,
$board_entity = $this->getBoardModel()->createByCrawler(
$this->getBoardsEntity(),
$this->getUserEntity(),
$cnt,
$listInfo,
$storages
);
foreach ($storages as $storage) {
try {
$storage->backend_process($boards_entity, $board_entity, $boardModel->getTable());
$storage->backend_process($this->getBoardsEntity(), $board_entity, $this->getBoardModel()->getTable());
} catch (\Exception $e) {
log_message("notice", sprintf(
"\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",

View File

@ -143,12 +143,17 @@ class SirCrawler extends MangboardCrawler
{
try {
if ($this->isDebug) {
$url = getenv("sir.view.test.url.{$this->_board_name}");
$this->detail_process(1, ['detail_url' => $url]);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$url} 작업종료");
$listInfo = [];
$listInfo['title'] = 'test_title';
$listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("sir.view.test.url.{$this->getBoardName()}");
$this->detail_process(1, $listInfo);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
} else {
$listInfos = [];
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->_board_name}"));
$response = $this->getMySocket()->getContent(getenv("sir.list.url.{$this->getBoardName()}"));
$this->getSelector($response, getenv("sir.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("sir.list.item.link.tag"));

View File

@ -69,12 +69,17 @@ class YamapCrawler extends MangboardCrawler
{
try {
if ($this->isDebug) {
$url = getenv("yamap.view.test.url.{$this->_board_name}");
$this->detail_process(1, ['detail_url' => $url]);
log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료");
$listInfo = [];
$listInfo['title'] = 'test_title';
$listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("yamap.view.test.url.{$this->getBoardName()}");
$this->detail_process(1, $listInfo);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
} else {
$listInfos = [];
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->_board_name}"));
$response = $this->getMySocket()->getContent(getenv("yamap.list.url.{$this->getBoardName()}"));
$selector = $this->getSelector($response, getenv("yamap.list.tag"));
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$selector->filter(getenv("yamap.list.item.tag"))->each(

View File

@ -91,12 +91,17 @@ class YamoonCrawler extends MangboardCrawler
{
try {
if ($this->isDebug) {
$url = getenv("yamoon.view.test.url.{$this->_board_name}");
$this->detail_process(1, ['detail_url' => $url]);
log_message("notice", __FUNCTION__ . "DEBUG 게시물 {$url} 작업종료");
$listInfo = [];
$listInfo['title'] = 'test_title';
$listInfo['nickname'] = 'test_name';
$listInfo['hit'] = 1;
$listInfo['date'] = date("Y-m-d H:i:s");
$listInfo['detail_url'] = getenv("yamoon.view.test.url.{$this->getBoardName()}");
$this->detail_process(1, $listInfo);
log_message("notice", __FUNCTION__ . "=> DEBUG 게시물 {$listInfo['detail_url']} 작업종료");
} else {
$listInfos = [];
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->_board_name}"));
$response = $this->getMySocket()->getContent(getenv("yamoon.list.url.{$this->getBoardName()}"));
$this->getSelector($response, getenv("yamoon.list.tag"))->each(
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("yamoon.list.item.link.tag"));