Automation init...3
This commit is contained in:
parent
befbaafae1
commit
471dbda929
@ -33,12 +33,11 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
|
||||
$routes->group('crawler', function ($routes) {
|
||||
$routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1');
|
||||
$routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2');
|
||||
$routes->cli('yamap/(:alpha)/(:any)/(:any)', 'CrawlerController::yamap/$1/$2/$3');
|
||||
$routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1');
|
||||
$routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2');
|
||||
$routes->cli('yamoon/(:alpha)/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2/$3');
|
||||
$routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1');
|
||||
$routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2');
|
||||
$routes->cli('sir/(:alpha)/(:any)/(:any)', 'CrawlerController::sir/$1/$2/$3');
|
||||
$routes->cli('inven/(:alpha)', 'CrawlerController::inven/$1');
|
||||
$routes->cli('inven/(:alpha)/(:any)', 'CrawlerController::inven/$1/$2');
|
||||
});
|
||||
});
|
||||
|
||||
@ -4,9 +4,10 @@ namespace App\Controllers\Mangboard;
|
||||
|
||||
use App\Controllers\CommonController;
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use App\Libraries\MyCrawler\Mangboard\InvenCrawler;
|
||||
use App\Libraries\MyCrawler\Mangboard\SirCrawler;
|
||||
use App\Libraries\MyCrawler\Mangboard\YamapCrawler;
|
||||
use App\Libraries\MyCrawler\Mangboard\YamoonCrawler;
|
||||
use App\Libraries\MyCrawler\Mangboard\SirCrawler;
|
||||
use App\Models\Mangboard\UserModel;
|
||||
|
||||
class CrawlerController extends CommonController
|
||||
@ -46,13 +47,13 @@ class CrawlerController extends CommonController
|
||||
log_message("notice", "{$id}로 로그인 성공");
|
||||
return $user_entity;
|
||||
}
|
||||
public function yamap(string $category, string $id = "", string $option = ""): string
|
||||
public function yamap(string $id = "", string $option = ""): string
|
||||
{
|
||||
try {
|
||||
//1. 사이트 로그인 처리
|
||||
$user_entity = $this->login($id);
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity);
|
||||
$crawler = new YamapCrawler(getenv('yamap.host.url'), getenv("yamap.host.board_name"), $user_entity);
|
||||
if ($option) {
|
||||
$crawler->setDebug($option === "debug" ? true : false);
|
||||
}
|
||||
@ -63,13 +64,13 @@ class CrawlerController extends CommonController
|
||||
return $e->getMessage();
|
||||
}
|
||||
}
|
||||
public function yamoon(string $category, string $id = "", string $option = ""): string
|
||||
public function yamoon(string $id = "", string $option = ""): string
|
||||
{
|
||||
try {
|
||||
//1. 사이트 로그인 처리
|
||||
$user_entity = $this->login($id);
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity);
|
||||
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), getenv("yamoon.host.board_name"), $user_entity);
|
||||
if ($option) {
|
||||
$crawler->setDebug($option === "debug" ? true : false);
|
||||
}
|
||||
@ -80,13 +81,13 @@ class CrawlerController extends CommonController
|
||||
return $e->getMessage();
|
||||
}
|
||||
}
|
||||
public function sir(string $category, string $id = "", string $option = ""): string
|
||||
public function sir(string $id = "", string $option = ""): string
|
||||
{
|
||||
try {
|
||||
//1. 사이트 로그인 처리
|
||||
$user_entity = $this->login($id);
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new SirCrawler(getenv("sir.host.url"), $category, $user_entity);
|
||||
$crawler = new SirCrawler(getenv("sir.host.url"), getenv("sir.host.board_name"), $user_entity);
|
||||
if ($option) {
|
||||
$crawler->setDebug($option === "debug" ? true : false);
|
||||
}
|
||||
@ -97,4 +98,21 @@ class CrawlerController extends CommonController
|
||||
return $e->getMessage();
|
||||
}
|
||||
}
|
||||
public function inven(string $id = "", string $option = ""): string
|
||||
{
|
||||
try {
|
||||
//1. 사이트 로그인 처리
|
||||
$user_entity = $this->login($id);
|
||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||
$crawler = new InvenCrawler(getenv("inven.host.url"), getenv("inven.host.board_name"), $user_entity);
|
||||
if ($option) {
|
||||
$crawler->setDebug($option === "debug" ? true : false);
|
||||
}
|
||||
$crawler->execute(intval(getenv("iven.list.max_limit")));
|
||||
return "완료되었습니다.";
|
||||
} catch (\Exception $e) {
|
||||
log_message("error", $e->getMessage());
|
||||
return $e->getMessage();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
118
app/Libraries/MyCrawler/Mangboard/InvenCrawler.php
Normal file
118
app/Libraries/MyCrawler/Mangboard/InvenCrawler.php
Normal file
@ -0,0 +1,118 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler\Mangboard;
|
||||
|
||||
use App\Entities\Mangboard\UserEntity;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class InvenCrawler extends MangboardCrawler
|
||||
{
|
||||
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
}
|
||||
//작성내용
|
||||
// <div class="articleContent">
|
||||
// <div id="imageCollectDiv" class="contentBody">
|
||||
// <!-- ============== CONTENT ============== -->
|
||||
// <div id="powerbbsContent">
|
||||
// <div id="BBSImageHolderTop" style="text-align:center;">
|
||||
// <img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1620925350.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
|
||||
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1587803007.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
|
||||
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1134295360.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
|
||||
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1481352611.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 1080 / 1350;" loading="lazy" />
|
||||
// <br><br><img src="https://upload3.inven.co.kr/upload/2024/09/15/bbs/i1878651605.jpg?MW=800" style="max-width: 100%; width: 800px; aspect-ratio: 850 / 1063;" loading="lazy" />
|
||||
// <br><br>
|
||||
// </div>
|
||||
// <div>^^</div>
|
||||
// </div>
|
||||
// <!-- ============== End CONTENT ============== -->
|
||||
// </div>
|
||||
protected function detail_page(int $cnt, array $listInfo): array
|
||||
{
|
||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||
$tag = getenv("inven.view.content.tag");
|
||||
$selector = $this->getSelector($response, $tag);
|
||||
$media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
|
||||
$media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
|
||||
if ($this->getDebug()) {
|
||||
throw new \Exception(sprintf(
|
||||
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
var_export($listInfo, true),
|
||||
var_export($media_urls, true)
|
||||
));
|
||||
} else {
|
||||
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
||||
$storages = $this->media_process($media_urls);
|
||||
if (!count($storages)) {
|
||||
throw new \Exception("등록할 자료가 없습니다.");
|
||||
}
|
||||
$this->backend_process($cnt, $listInfo, $storages);
|
||||
}
|
||||
return $listInfo;
|
||||
}
|
||||
//리스트내용
|
||||
// <div class="board-list">
|
||||
// <table>
|
||||
// <tr class="lgtm">
|
||||
// <td class="num"><span>1589</span></td>
|
||||
// <td class="tit">
|
||||
// <div class="text-wrap">
|
||||
// <div>
|
||||
// <span class="user-icon">
|
||||
// <img src="https://upload3.inven.co.kr/upload/2024/06/12/icon/i1237935053.jpg" alt="유저 아이콘" loading="lazy">
|
||||
// </span>
|
||||
// <a class="subject-link" href="https://www.inven.co.kr/board/party/5951/1589">
|
||||
// <span class="board_name">[사진&움짤]</span>스테이씨 윤
|
||||
// </a>
|
||||
// </div>
|
||||
// <span data-opinion-bbs-comeidx="5951" data-opinion-bbs-uid="1589" data-opinion-bbs-opi="1" class="con-comment">[1]</span>
|
||||
// <span class="con-icon board-img photo">사진</span>
|
||||
// </div>
|
||||
// </td>
|
||||
// <td class="user">
|
||||
// <img src="https://static.inven.co.kr/image_2011/member/level/1202/lv32.gif" alt="레벨 아이콘">
|
||||
// <span class="layerNickName" onclick="layerNickName('배수민', 'pbNickNameHandler'); ">배수민</span>
|
||||
// </td>
|
||||
// <td class="date">09-15</td>
|
||||
// <td class="view">1,502</td>
|
||||
// <td class="reco">1</td>
|
||||
// </tr>
|
||||
// </table>
|
||||
// </div>
|
||||
public function execute(int $max_limit): void
|
||||
{
|
||||
try {
|
||||
$listInfos = [];
|
||||
if ($this->getDebug()) {
|
||||
$this->detail_page(1, ['detail_url' => getenv("inven.view.test.url")]);
|
||||
}
|
||||
$response = $this->getMySocket()->getContent(getenv("inven.list.url"));
|
||||
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
|
||||
$selector = $this->getSelector($response, getenv("inven.list.tag"))->each(
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
|
||||
$date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
|
||||
$nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
|
||||
//작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
|
||||
$link_node = $node->filter(getenv("inven.list.item.link.tag"));
|
||||
$detail_url = $link_node->attr("href");
|
||||
$title = $link_node->children()->last()->text();
|
||||
$listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
|
||||
}
|
||||
);
|
||||
if (!count($listInfos)) {
|
||||
throw new \Exception("Target URL이 없습니다.");
|
||||
}
|
||||
$this->main_process($max_limit, $listInfos);
|
||||
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||
} catch (\Exception $e) {
|
||||
log_message("warning", sprintf(
|
||||
"\n---%s 오류---\n%s\n-----------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
$e->getMessage()
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -13,13 +13,13 @@ abstract class MangboardCrawler extends MyCrawler
|
||||
{
|
||||
protected $_mySocket = null;
|
||||
protected $_host = "";
|
||||
protected $_category = "";
|
||||
protected $_user_entity = null;
|
||||
protected function __construct(string $host, string $category, UserEntity $user_entity)
|
||||
private $_board_name = "";
|
||||
private $_user_entity = null;
|
||||
protected function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct();
|
||||
$this->_host = $host;
|
||||
$this->_category = $category;
|
||||
$this->_board_name = $board_name;
|
||||
$this->_user_entity = $user_entity;
|
||||
}
|
||||
protected function getMySocket()
|
||||
@ -31,14 +31,14 @@ abstract class MangboardCrawler extends MyCrawler
|
||||
}
|
||||
final protected function createMyStorage()
|
||||
{
|
||||
return new MangboardStorage($this->_category, $this->_user_entity);
|
||||
return new MangboardStorage($this->_board_name, $this->_user_entity);
|
||||
}
|
||||
protected function backend_process(int $cnt, array $listInfo, array $storages)
|
||||
{
|
||||
//File DB 및 Board DB 등록작업등
|
||||
$baord_name = "board_" . $this->_category;
|
||||
$baord_name = $this->_board_name;
|
||||
$boardsModel = new BoardsModel();
|
||||
$boards_entity = $boardsModel->getEntityByID("board_" . $this->_category);
|
||||
$boards_entity = $boardsModel->getEntityByID($this->_board_name);
|
||||
$boardModel = new BoardModel("mb_" . $baord_name);
|
||||
$board_entity = $boardModel->createByCrawler(
|
||||
$boards_entity,
|
||||
|
||||
@ -9,13 +9,13 @@ use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class SirCrawler extends MangboardCrawler
|
||||
{
|
||||
public function __construct(string $host, string $category, UserEntity $user_entity)
|
||||
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct($host, $category, $user_entity);
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
}
|
||||
protected function getChangeURL(string $url): string
|
||||
protected function changeURLByMediaType(string $url): string
|
||||
{
|
||||
return str_replace("/sir.kr/", "", $url);
|
||||
return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url));
|
||||
}
|
||||
//작성내용
|
||||
// <article class="sir_vbo ">
|
||||
@ -151,7 +151,7 @@ class SirCrawler extends MangboardCrawler
|
||||
function (Crawler $node) use (&$listInfos): void {
|
||||
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
|
||||
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
|
||||
$detail_url = $this->getChangeURL($link_node->attr("href"));
|
||||
$detail_url = str_replace("/sir.kr/", "", $link_node->attr("href"));
|
||||
// $detail_url = $link_node->attr("href");
|
||||
$title = $link_node->text();
|
||||
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
|
||||
|
||||
@ -7,9 +7,9 @@ use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class YamapCrawler extends MangboardCrawler
|
||||
{
|
||||
public function __construct(string $host, string $category, UserEntity $user_entity)
|
||||
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct($host, $category, $user_entity);
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
}
|
||||
protected function detail_page(int $cnt, array $listInfo): array
|
||||
{
|
||||
|
||||
@ -7,9 +7,9 @@ use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class YamoonCrawler extends MangboardCrawler
|
||||
{
|
||||
public function __construct(string $host, string $category, UserEntity $user_entity)
|
||||
public function __construct(string $host, string $board_name, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct($host, $category, $user_entity);
|
||||
parent::__construct($host, $board_name, $user_entity);
|
||||
}
|
||||
//작성내용
|
||||
// <div class="panel panel-default">
|
||||
|
||||
@ -16,14 +16,14 @@ abstract class MyCrawler extends CommonLibrary
|
||||
abstract protected function getMySocket();
|
||||
abstract protected function createMyStorage();
|
||||
abstract protected function detail_page(int $cnt, array $listInfo): array;
|
||||
final protected function getSelector(string $content, string $tag, $isViewHTML = false): Crawler
|
||||
final protected function getSelector(string $content, string $tag): Crawler
|
||||
{
|
||||
$crawler = new Crawler($content);
|
||||
if ($this->getDebug()) {
|
||||
log_message("debug", __FUNCTION__ . "=> " . $tag);
|
||||
}
|
||||
$crawler->filter($tag);
|
||||
if ($isViewHTML) {
|
||||
if ($this->getDebug()) {
|
||||
log_message("debug", sprintf(
|
||||
"\n------------%s HTML-------------\n%s\n-----------------------------------------------------\n",
|
||||
__FUNCTION__,
|
||||
@ -33,9 +33,9 @@ abstract class MyCrawler extends CommonLibrary
|
||||
return $crawler->filter($tag);
|
||||
}
|
||||
|
||||
protected function getChangeURL(string $url): string
|
||||
protected function changeURLByMediaType(string $url): string
|
||||
{
|
||||
return $url;
|
||||
return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
|
||||
}
|
||||
protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
|
||||
{
|
||||
@ -50,8 +50,8 @@ abstract class MyCrawler extends CommonLibrary
|
||||
}
|
||||
break;
|
||||
}
|
||||
if ($url !== null) {
|
||||
$urls[$media_type][] = $this->getChangeURL($url);
|
||||
if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) {
|
||||
$urls[$media_type][] = $this->changeURLByMediaType($matches[0]);
|
||||
} else {
|
||||
log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]\n");
|
||||
log_message("debug", $node->html());
|
||||
|
||||
@ -11,13 +11,13 @@ use App\Traits\ImageTrait;
|
||||
class MangboardStorage extends FileStorage
|
||||
{
|
||||
use ImageTrait;
|
||||
private $_category = "";
|
||||
private $_board_name = "";
|
||||
private $_user_entity = null;
|
||||
private $_fileModel = null;
|
||||
public function __construct(string $category, UserEntity $user_entity)
|
||||
public function __construct(string $board_name, UserEntity $user_entity)
|
||||
{
|
||||
parent::__construct($category);
|
||||
$this->_category = $category;
|
||||
parent::__construct($board_name);
|
||||
$this->_board_name = $board_name;
|
||||
$this->_user_entity = $user_entity;
|
||||
}
|
||||
final protected function getFileModel(): FileModel
|
||||
|
||||
Loading…
Reference in New Issue
Block a user