diff --git a/app/Config/Routes.php b/app/Config/Routes.php
index 6e551ce..0fbe75f 100644
--- a/app/Config/Routes.php
+++ b/app/Config/Routes.php
@@ -33,12 +33,11 @@ $routes->group('mangboard', ['namespace' => 'App\Controllers\Mangboard'], functi
$routes->group('crawler', function ($routes) {
$routes->cli('yamap/(:alpha)', 'CrawlerController::yamap/$1');
$routes->cli('yamap/(:alpha)/(:any)', 'CrawlerController::yamap/$1/$2');
- $routes->cli('yamap/(:alpha)/(:any)/(:any)', 'CrawlerController::yamap/$1/$2/$3');
$routes->cli('yamoon/(:alpha)', 'CrawlerController::yamoon/$1');
$routes->cli('yamoon/(:alpha)/(:any)', 'CrawlerController::yamoon/$1/$2');
- $routes->cli('yamoon/(:alpha)/(:any)/(:any)', 'CrawlerController::yamoon/$1/$2/$3');
$routes->cli('sir/(:alpha)', 'CrawlerController::sir/$1');
$routes->cli('sir/(:alpha)/(:any)', 'CrawlerController::sir/$1/$2');
- $routes->cli('sir/(:alpha)/(:any)/(:any)', 'CrawlerController::sir/$1/$2/$3');
+ $routes->cli('inven/(:alpha)', 'CrawlerController::inven/$1');
+ $routes->cli('inven/(:alpha)/(:any)', 'CrawlerController::inven/$1/$2');
});
});
diff --git a/app/Controllers/Mangboard/CrawlerController.php b/app/Controllers/Mangboard/CrawlerController.php
index c55ad09..f7c7eb2 100644
--- a/app/Controllers/Mangboard/CrawlerController.php
+++ b/app/Controllers/Mangboard/CrawlerController.php
@@ -4,9 +4,10 @@ namespace App\Controllers\Mangboard;
use App\Controllers\CommonController;
use App\Entities\Mangboard\UserEntity;
+use App\Libraries\MyCrawler\Mangboard\InvenCrawler;
+use App\Libraries\MyCrawler\Mangboard\SirCrawler;
use App\Libraries\MyCrawler\Mangboard\YamapCrawler;
use App\Libraries\MyCrawler\Mangboard\YamoonCrawler;
-use App\Libraries\MyCrawler\Mangboard\SirCrawler;
use App\Models\Mangboard\UserModel;
class CrawlerController extends CommonController
@@ -46,13 +47,13 @@ class CrawlerController extends CommonController
log_message("notice", "{$id}로 로그인 성공");
return $user_entity;
}
- public function yamap(string $category, string $id = "", string $option = ""): string
+ public function yamap(string $id = "", string $option = ""): string
{
try {
//1. 사이트 로그인 처리
$user_entity = $this->login($id);
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
- $crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity);
+ $crawler = new YamapCrawler(getenv('yamap.host.url'), getenv("yamap.host.board_name"), $user_entity);
if ($option) {
$crawler->setDebug($option === "debug" ? true : false);
}
@@ -63,13 +64,13 @@ class CrawlerController extends CommonController
return $e->getMessage();
}
}
- public function yamoon(string $category, string $id = "", string $option = ""): string
+ public function yamoon(string $id = "", string $option = ""): string
{
try {
//1. 사이트 로그인 처리
$user_entity = $this->login($id);
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
- $crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity);
+ $crawler = new YamoonCrawler(getenv("yamoon.host.url"), getenv("yamoon.host.board_name"), $user_entity);
if ($option) {
$crawler->setDebug($option === "debug" ? true : false);
}
@@ -80,13 +81,13 @@ class CrawlerController extends CommonController
return $e->getMessage();
}
}
- public function sir(string $category, string $id = "", string $option = ""): string
+ public function sir(string $id = "", string $option = ""): string
{
try {
//1. 사이트 로그인 처리
$user_entity = $this->login($id);
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
- $crawler = new SirCrawler(getenv("sir.host.url"), $category, $user_entity);
+ $crawler = new SirCrawler(getenv("sir.host.url"), getenv("sir.host.board_name"), $user_entity);
if ($option) {
$crawler->setDebug($option === "debug" ? true : false);
}
@@ -97,4 +98,21 @@ class CrawlerController extends CommonController
return $e->getMessage();
}
}
+ public function inven(string $id = "", string $option = ""): string
+ {
+ try {
+ //1. 사이트 로그인 처리
+ $user_entity = $this->login($id);
+ //2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
+ $crawler = new InvenCrawler(getenv("inven.host.url"), getenv("inven.host.board_name"), $user_entity);
+ if ($option) {
+ $crawler->setDebug($option === "debug" ? true : false);
+ }
+ $crawler->execute(intval(getenv("iven.list.max_limit")));
+ return "완료되었습니다.";
+ } catch (\Exception $e) {
+ log_message("error", $e->getMessage());
+ return $e->getMessage();
+ }
+ }
}
diff --git a/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php b/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php
new file mode 100644
index 0000000..93ab301
--- /dev/null
+++ b/app/Libraries/MyCrawler/Mangboard/InvenCrawler.php
@@ -0,0 +1,118 @@
+
+ //
+ //
+ //
+ //
+ //

+ //

+ //

+ //

+ //

+ //
+ //
+ //
^^
+ //
+ //
+ //
+ protected function detail_page(int $cnt, array $listInfo): array
+ {
+ $response = $this->getMySocket()->getContent($listInfo['detail_url']);
+ $tag = getenv("inven.view.content.tag");
+ $selector = $this->getSelector($response, $tag);
+ $media_urls = $this->getUrlsByMediaType("image", $selector, ["tag" => "img", "attr" => "src"]);
+ $media_urls = $this->getUrlsByMediaType("video", $selector, ["tag" => "video", "attr" => "src"], $media_urls);
+ if ($this->getDebug()) {
+ throw new \Exception(sprintf(
+ "\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
+ __FUNCTION__,
+ var_export($listInfo, true),
+ var_export($media_urls, true)
+ ));
+ } else {
+ // Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
+ $storages = $this->media_process($media_urls);
+ if (!count($storages)) {
+ throw new \Exception("등록할 자료가 없습니다.");
+ }
+ $this->backend_process($cnt, $listInfo, $storages);
+ }
+ return $listInfo;
+ }
+ //리스트내용
+ //
+ //
+ //
+ // | 1589 |
+ //
+ //
+ // |
+ //
+ //
+ // 배수민
+ // |
+ // 09-15 |
+ // 1,502 |
+ // 1 |
+ //
+ //
+ //
+ public function execute(int $max_limit): void
+ {
+ try {
+ $listInfos = [];
+ if ($this->getDebug()) {
+ $this->detail_page(1, ['detail_url' => getenv("inven.view.test.url")]);
+ }
+ $response = $this->getMySocket()->getContent(getenv("inven.list.url"));
+ //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
+ $selector = $this->getSelector($response, getenv("inven.list.tag"))->each(
+ function (Crawler $node) use (&$listInfos): void {
+ $hit = $node->filter(getenv("inven.list.item.hit.tag"))->text();
+ $date = date("Y") . "-" . $node->filter(getenv("inven.list.item.date.tag"))->text();
+ $nickname = $node->filter(getenv("inven.list.item.nickname.tag"))->text();
+ //작성자가 "관리자"가 아닌 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
+ $link_node = $node->filter(getenv("inven.list.item.link.tag"));
+ $detail_url = $link_node->attr("href");
+ $title = $link_node->children()->last()->text();
+ $listInfos[] = ['title' => $title, 'nickname' => $nickname, 'detail_url' => $detail_url, 'date' => $date, 'hit' => $hit];
+ }
+ );
+ if (!count($listInfos)) {
+ throw new \Exception("Target URL이 없습니다.");
+ }
+ $this->main_process($max_limit, $listInfos);
+ log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
+ } catch (\Exception $e) {
+ log_message("warning", sprintf(
+ "\n---%s 오류---\n%s\n-----------------------------------------\n",
+ __FUNCTION__,
+ $e->getMessage()
+ ));
+ }
+ }
+}
diff --git a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php
index 3f1fe9a..97e7213 100644
--- a/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/MangboardCrawler.php
@@ -13,13 +13,13 @@ abstract class MangboardCrawler extends MyCrawler
{
protected $_mySocket = null;
protected $_host = "";
- protected $_category = "";
- protected $_user_entity = null;
- protected function __construct(string $host, string $category, UserEntity $user_entity)
+ private $_board_name = "";
+ private $_user_entity = null;
+ protected function __construct(string $host, string $board_name, UserEntity $user_entity)
{
parent::__construct();
$this->_host = $host;
- $this->_category = $category;
+ $this->_board_name = $board_name;
$this->_user_entity = $user_entity;
}
protected function getMySocket()
@@ -31,14 +31,14 @@ abstract class MangboardCrawler extends MyCrawler
}
final protected function createMyStorage()
{
- return new MangboardStorage($this->_category, $this->_user_entity);
+ return new MangboardStorage($this->_board_name, $this->_user_entity);
}
protected function backend_process(int $cnt, array $listInfo, array $storages)
{
//File DB 및 Board DB 등록작업등
- $baord_name = "board_" . $this->_category;
+ $baord_name = $this->_board_name;
$boardsModel = new BoardsModel();
- $boards_entity = $boardsModel->getEntityByID("board_" . $this->_category);
+ $boards_entity = $boardsModel->getEntityByID($this->_board_name);
$boardModel = new BoardModel("mb_" . $baord_name);
$board_entity = $boardModel->createByCrawler(
$boards_entity,
diff --git a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
index 92e846f..555652c 100644
--- a/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/SirCrawler.php
@@ -9,13 +9,13 @@ use Symfony\Component\DomCrawler\Crawler;
class SirCrawler extends MangboardCrawler
{
- public function __construct(string $host, string $category, UserEntity $user_entity)
+ public function __construct(string $host, string $board_name, UserEntity $user_entity)
{
- parent::__construct($host, $category, $user_entity);
+ parent::__construct($host, $board_name, $user_entity);
}
- protected function getChangeURL(string $url): string
+ protected function changeURLByMediaType(string $url): string
{
- return str_replace("/sir.kr/", "", $url);
+ return str_replace("/sir.kr/", "", parent::changeURLByMediaType($url));
}
//작성내용
//
@@ -151,7 +151,7 @@ class SirCrawler extends MangboardCrawler
function (Crawler $node) use (&$listInfos): void {
$link_node = $node->filter(getenv("sir.list.item.link.tag"));
// href url의 맨 앞이 /가 두개라서 한개를 빼기위함
- $detail_url = $this->getChangeURL($link_node->attr("href"));
+ $detail_url = str_replace("/sir.kr/", "", $link_node->attr("href"));
// $detail_url = $link_node->attr("href");
$title = $link_node->text();
$nickname = $node->filter(getenv("sir.list.item.nickname.tag"))->text();
diff --git a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php
index e3d2764..4e59bb0 100644
--- a/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/YamapCrawler.php
@@ -7,9 +7,9 @@ use Symfony\Component\DomCrawler\Crawler;
class YamapCrawler extends MangboardCrawler
{
- public function __construct(string $host, string $category, UserEntity $user_entity)
+ public function __construct(string $host, string $board_name, UserEntity $user_entity)
{
- parent::__construct($host, $category, $user_entity);
+ parent::__construct($host, $board_name, $user_entity);
}
protected function detail_page(int $cnt, array $listInfo): array
{
diff --git a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
index 7dc4bfe..41a760d 100644
--- a/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
+++ b/app/Libraries/MyCrawler/Mangboard/YamoonCrawler.php
@@ -7,9 +7,9 @@ use Symfony\Component\DomCrawler\Crawler;
class YamoonCrawler extends MangboardCrawler
{
- public function __construct(string $host, string $category, UserEntity $user_entity)
+ public function __construct(string $host, string $board_name, UserEntity $user_entity)
{
- parent::__construct($host, $category, $user_entity);
+ parent::__construct($host, $board_name, $user_entity);
}
//작성내용
//
diff --git a/app/Libraries/MyCrawler/MyCrawler.php b/app/Libraries/MyCrawler/MyCrawler.php
index 130e53f..712a884 100644
--- a/app/Libraries/MyCrawler/MyCrawler.php
+++ b/app/Libraries/MyCrawler/MyCrawler.php
@@ -16,14 +16,14 @@ abstract class MyCrawler extends CommonLibrary
abstract protected function getMySocket();
abstract protected function createMyStorage();
abstract protected function detail_page(int $cnt, array $listInfo): array;
- final protected function getSelector(string $content, string $tag, $isViewHTML = false): Crawler
+ final protected function getSelector(string $content, string $tag): Crawler
{
$crawler = new Crawler($content);
if ($this->getDebug()) {
log_message("debug", __FUNCTION__ . "=> " . $tag);
}
$crawler->filter($tag);
- if ($isViewHTML) {
+ if ($this->getDebug()) {
log_message("debug", sprintf(
"\n------------%s HTML-------------\n%s\n-----------------------------------------------------\n",
__FUNCTION__,
@@ -33,9 +33,9 @@ abstract class MyCrawler extends CommonLibrary
return $crawler->filter($tag);
}
- protected function getChangeURL(string $url): string
+ protected function changeURLByMediaType(string $url): string
{
- return $url;
+ return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
}
protected function getUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
{
@@ -50,8 +50,8 @@ abstract class MyCrawler extends CommonLibrary
}
break;
}
- if ($url !== null) {
- $urls[$media_type][] = $this->getChangeURL($url);
+ if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) {
+ $urls[$media_type][] = $this->changeURLByMediaType($matches[0]);
} else {
log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]\n");
log_message("debug", $node->html());
diff --git a/app/Libraries/MyStorage/MangboardStorage.php b/app/Libraries/MyStorage/MangboardStorage.php
index f3971c0..75fc286 100644
--- a/app/Libraries/MyStorage/MangboardStorage.php
+++ b/app/Libraries/MyStorage/MangboardStorage.php
@@ -11,13 +11,13 @@ use App\Traits\ImageTrait;
class MangboardStorage extends FileStorage
{
use ImageTrait;
- private $_category = "";
+ private $_board_name = "";
private $_user_entity = null;
private $_fileModel = null;
- public function __construct(string $category, UserEntity $user_entity)
+ public function __construct(string $board_name, UserEntity $user_entity)
{
- parent::__construct($category);
- $this->_category = $category;
+ parent::__construct($board_name);
+ $this->_board_name = $board_name;
$this->_user_entity = $user_entity;
}
final protected function getFileModel(): FileModel