diff --git a/app/Controllers/CLI/Crawler.php b/app/Controllers/CLI/Crawler.php
index cbf4c8d..107e310 100644
--- a/app/Controllers/CLI/Crawler.php
+++ b/app/Controllers/CLI/Crawler.php
@@ -4,10 +4,8 @@ namespace App\Controllers\CLI;
use App\Models\Mangboard\BoardModel;
-use App\Libraries\YamapLibrary;
use App\Libraries\MyWebLibrary;
-use App\Libraries\MyStorage\FileLibrary;
-use App\Libraries\MyCrawlerLibrary;
+use App\Libraries\MyCrawler\YamapLibrary;
use App\Libraries\Mangboard\BoardLibrary;
use App\Entities\Mangboard\BoardEntity;
use App\Controllers\BaseController;
@@ -20,17 +18,9 @@ class Crawler extends BaseController
$isDebug = in_array("debug", $params);
//1.Yamap사이트에서 자유게시판에서 최근 게시물 데이터 가져오기
if (!in_array("skip_build", $params)) {
- $myWeb = new MyWebLibrary(getenv('yamap.host.url'));
- $storage = new FileLibrary(WRITEPATH . "uploads");
- $storage->setPath("Yamap");
- $crawler = new MyCrawlerLibrary();
-
$yamap = new YamapLibrary();
$yamap->setDebug($isDebug);
- $yamap->setMyWeb($myWeb);
- $yamap->setMyStorage($storage);
- $yamap->setMyCrawler($crawler);
- list($title, $nickname, $mediaInfos, $mediaTags) = $yamap->build();
+ list($title, $nickname, $mediaInfos, $mediaTags) = $yamap->execute();
}
// //2. 사이트 로그인 처리
// if (!in_array("skip_login", $params)) {
@@ -58,7 +48,7 @@ class Crawler extends BaseController
//망보드에 넣기
$board->create($entity);
}
- log_message("notice", "Crawler->yapmap 작업이 완료되었습니다.");
+ log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다.");
return true;
} catch (\Exception $e) {
log_message("error", $e->getMessage());
diff --git a/app/Libraries/CommonLibrary.php b/app/Libraries/CommonLibrary.php
index 55ba6fb..509b427 100644
--- a/app/Libraries/CommonLibrary.php
+++ b/app/Libraries/CommonLibrary.php
@@ -15,10 +15,4 @@ abstract class CommonLibrary
{
$this->_debug = $debug;
}
-
- //url에 http 나 https가 포함되어 있으면 true
- final public function isContainsHttpOrHttps($url): bool
- {
- return strpos($url, 'http://') !== false || strpos($url, 'https://') !== false;
- }
}
diff --git a/app/Libraries/MyCrawler/MyCrawlerLibrary.php b/app/Libraries/MyCrawler/MyCrawlerLibrary.php
new file mode 100644
index 0000000..c87d340
--- /dev/null
+++ b/app/Libraries/MyCrawler/MyCrawlerLibrary.php
@@ -0,0 +1,58 @@
+getMySocket()->getContent($url);
+ if (!$response) {
+ throw new \Exception("getCrawler 실패:{$url}");
+ }
+ $crawler = new Crawler($response);
+ return $crawler->filter($tag);
+ }
+
+ final protected function getNodes(Crawler $crawler, array $options, $nodes = []): array
+ {
+ $crawler->filter($options["tag"])->each(
+ function (Crawler $node) use (&$options, &$nodes): void {
+ log_message("debug", sprintf("getNode-> %s", $options["tag"]));
+ $nodes[] = $node;
+ }
+ );
+ return $nodes;
+ }
+
+ final protected function download(Crawler $crawler, array $options): array
+ {
+ $downloadInfos = [];
+ $nodes = $this->getNodes($crawler, $options);
+ foreach ($nodes as $node) {
+ $original = $node->attr($options["attr"]);
+ list($fileName, $content) = $this->getMySocket()->download($original);
+ $this->getMyStorage()->setFileName($fileName);
+ if (!$this->getMyStorage()->save($content)) {
+ continue;
+ }
+ $downloadInfos[] = [
+ "orignal" => $node->html(),
+ "path" => $this->getMyStorage()->getPath(),
+ "fileName" => $fileName,
+ ];
+ }
+ return $downloadInfos;
+ }
+}
diff --git a/app/Libraries/MyCrawler/YamapLibrary.php b/app/Libraries/MyCrawler/YamapLibrary.php
new file mode 100644
index 0000000..2d32c33
--- /dev/null
+++ b/app/Libraries/MyCrawler/YamapLibrary.php
@@ -0,0 +1,125 @@
+_mySocket === null) {
+ $this->_mySocket = new WebLibrary(getenv('yamap.host.url'));
+ }
+ return $this->_mySocket;
+ }
+
+ public function getMyStorage()
+ {
+ if ($this->_myStorage === null) {
+ $this->_myStorage = new FileLibrary(getenv('yamap.storage.upload.path'));
+ }
+ return $this->_myStorage;
+ }
+
+ private function mainPage(string $url): array
+ {
+ $crawler = $this->getContent($url, getenv("yamap.list.tag"));
+ $item_tag = getenv("yamap.list.item.tag");
+ $item_link_tag = getenv("yamap.list.item.link.tag");
+ $item_nickname_tag = getenv("yamap.list.item.nickname.tag");
+ $item_nickname_except = getenv("yamap.list.item.nickname.except");
+
+ $lists = [];
+ //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
+ $crawler->filter($item_tag)->each(
+ function (Crawler $node) use (
+ &$item_link_tag,
+ &$item_nickname_tag,
+ &$item_nickname_except,
+ &$lists
+ ): void {
+ //bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
+ $nickname = $node->filter($item_nickname_tag)->text();
+ log_message("debug", $item_nickname_tag . ":" . $nickname);
+ if ($nickname != $item_nickname_except) {
+ //작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
+ $link_node = $node->filter($item_link_tag);
+ $url = $link_node->attr("href");
+ $title = $link_node->children()->last()->text();
+ $lists[] = ['title' => $title, 'nickname' => $nickname, 'url' => $url];
+ }
+ }
+ );
+ if (!count($lists)) {
+ throw new \Exception("Target URL이 없습니다.");
+ }
+ return array($lists[0]["title"], $lists[0]["nickname"], $lists[0]["url"]);
+ }
+
+ private function detailPage(string $url): array
+ {
+ $crawler = $this->getContent($url, getenv("yamap.view.content.tag"));
+ $mediaInfos = [];
+ $mediaTags = [];
+ //3. Image 처리
+ $downloadInfos = $this->download($crawler, ["tag" => "img", "attr" => "src"]);
+ foreach ($downloadInfos as $downloadInfo) {
+ if ($this->getMySocket()->isContainsHttpOrHttps($downloadInfo['orignal'])) {
+ $mediaTags[] = $downloadInfos['orignal'];
+ } else {
+ $mediaTags[] = sprintf(
+ "
",
+ $this->getMyStorage()->getUploadPath(),
+ $downloadInfo["path"],
+ $downloadInfo["fileName"],
+ $downloadInfo["fileName"]
+ );
+ };
+ $mediaInfos[] = $downloadInfo;
+ }
+ //4. Video(mp4) 처리
+ $downloadInfos = $this->download($crawler, ["tag" => "video", "attr" => "src"]);
+ foreach ($downloadInfos as $downloadInfo) {
+ if ($this->getMySocket()->isContainsHttpOrHttps($downloadInfo['orignal'])) {
+ $mediaTags[] = $downloadInfos['orignal'];
+ } else {
+ $mediaTags[] = sprintf(
+ "