Automation init...1

This commit is contained in:
최준흠 2024-09-07 15:13:03 +09:00
parent b698eb6f71
commit b33eead7a9
10 changed files with 220 additions and 260 deletions

View File

@ -4,10 +4,8 @@ namespace App\Controllers\CLI;
use App\Models\Mangboard\BoardModel; use App\Models\Mangboard\BoardModel;
use App\Libraries\YamapLibrary;
use App\Libraries\MyWebLibrary; use App\Libraries\MyWebLibrary;
use App\Libraries\MyStorage\FileLibrary; use App\Libraries\MyCrawler\YamapLibrary;
use App\Libraries\MyCrawlerLibrary;
use App\Libraries\Mangboard\BoardLibrary; use App\Libraries\Mangboard\BoardLibrary;
use App\Entities\Mangboard\BoardEntity; use App\Entities\Mangboard\BoardEntity;
use App\Controllers\BaseController; use App\Controllers\BaseController;
@ -20,17 +18,9 @@ class Crawler extends BaseController
$isDebug = in_array("debug", $params); $isDebug = in_array("debug", $params);
//1.Yamap사이트에서 자유게시판에서 최근 게시물 데이터 가져오기 //1.Yamap사이트에서 자유게시판에서 최근 게시물 데이터 가져오기
if (!in_array("skip_build", $params)) { if (!in_array("skip_build", $params)) {
$myWeb = new MyWebLibrary(getenv('yamap.host.url'));
$storage = new FileLibrary(WRITEPATH . "uploads");
$storage->setPath("Yamap");
$crawler = new MyCrawlerLibrary();
$yamap = new YamapLibrary(); $yamap = new YamapLibrary();
$yamap->setDebug($isDebug); $yamap->setDebug($isDebug);
$yamap->setMyWeb($myWeb); list($title, $nickname, $mediaInfos, $mediaTags) = $yamap->execute();
$yamap->setMyStorage($storage);
$yamap->setMyCrawler($crawler);
list($title, $nickname, $mediaInfos, $mediaTags) = $yamap->build();
} }
// //2. 사이트 로그인 처리 // //2. 사이트 로그인 처리
// if (!in_array("skip_login", $params)) { // if (!in_array("skip_login", $params)) {
@ -58,7 +48,7 @@ class Crawler extends BaseController
//망보드에 넣기 //망보드에 넣기
$board->create($entity); $board->create($entity);
} }
log_message("notice", "Crawler->yapmap 작업이 완료되었습니다."); log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다.");
return true; return true;
} catch (\Exception $e) { } catch (\Exception $e) {
log_message("error", $e->getMessage()); log_message("error", $e->getMessage());

View File

@ -15,10 +15,4 @@ abstract class CommonLibrary
{ {
$this->_debug = $debug; $this->_debug = $debug;
} }
//url에 http 나 https가 포함되어 있으면 true
final public function isContainsHttpOrHttps($url): bool
{
return strpos($url, 'http://') !== false || strpos($url, 'https://') !== false;
}
} }

View File

@ -0,0 +1,58 @@
<?php
namespace App\Libraries\MyCrawler;
use Symfony\Component\DomCrawler\Crawler;
use App\Libraries\CommonLibrary;
abstract class MyCrawlerLibrary extends CommonLibrary
{
protected function __construct()
{
parent::__construct();
}
abstract public function getMySocket();
abstract public function getMyStorage();
abstract public function execute(): array;
final protected function getContent(string $url, string $tag): Crawler
{
$response = $this->getMySocket()->getContent($url);
if (!$response) {
throw new \Exception("getCrawler 실패:{$url}");
}
$crawler = new Crawler($response);
return $crawler->filter($tag);
}
final protected function getNodes(Crawler $crawler, array $options, $nodes = []): array
{
$crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options, &$nodes): void {
log_message("debug", sprintf("getNode-> %s", $options["tag"]));
$nodes[] = $node;
}
);
return $nodes;
}
final protected function download(Crawler $crawler, array $options): array
{
$downloadInfos = [];
$nodes = $this->getNodes($crawler, $options);
foreach ($nodes as $node) {
$original = $node->attr($options["attr"]);
list($fileName, $content) = $this->getMySocket()->download($original);
$this->getMyStorage()->setFileName($fileName);
if (!$this->getMyStorage()->save($content)) {
continue;
}
$downloadInfos[] = [
"orignal" => $node->html(),
"path" => $this->getMyStorage()->getPath(),
"fileName" => $fileName,
];
}
return $downloadInfos;
}
}

View File

@ -0,0 +1,125 @@
<?php
namespace App\Libraries\MyCrawler;
use Symfony\Component\DomCrawler\Crawler;
use App\Libraries\MySocket\WebLibrary;
use App\Libraries\MyStorage\FileLibrary;
class YamapLibrary extends MyCrawlerLibrary
{
private $_mySocket = null;
private $_myStorage = null;
public function __construct()
{
parent::__construct();
}
public function getMySocket()
{
if ($this->_mySocket === null) {
$this->_mySocket = new WebLibrary(getenv('yamap.host.url'));
}
return $this->_mySocket;
}
public function getMyStorage()
{
if ($this->_myStorage === null) {
$this->_myStorage = new FileLibrary(getenv('yamap.storage.upload.path'));
}
return $this->_myStorage;
}
private function mainPage(string $url): array
{
$crawler = $this->getContent($url, getenv("yamap.list.tag"));
$item_tag = getenv("yamap.list.item.tag");
$item_link_tag = getenv("yamap.list.item.link.tag");
$item_nickname_tag = getenv("yamap.list.item.nickname.tag");
$item_nickname_except = getenv("yamap.list.item.nickname.except");
$lists = [];
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$crawler->filter($item_tag)->each(
function (Crawler $node) use (
&$item_link_tag,
&$item_nickname_tag,
&$item_nickname_except,
&$lists
): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter($item_nickname_tag)->text();
log_message("debug", $item_nickname_tag . ":" . $nickname);
if ($nickname != $item_nickname_except) {
//작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter($item_link_tag);
$url = $link_node->attr("href");
$title = $link_node->children()->last()->text();
$lists[] = ['title' => $title, 'nickname' => $nickname, 'url' => $url];
}
}
);
if (!count($lists)) {
throw new \Exception("Target URL이 없습니다.");
}
return array($lists[0]["title"], $lists[0]["nickname"], $lists[0]["url"]);
}
private function detailPage(string $url): array
{
$crawler = $this->getContent($url, getenv("yamap.view.content.tag"));
$mediaInfos = [];
$mediaTags = [];
//3. Image 처리
$downloadInfos = $this->download($crawler, ["tag" => "img", "attr" => "src"]);
foreach ($downloadInfos as $downloadInfo) {
if ($this->getMySocket()->isContainsHttpOrHttps($downloadInfo['orignal'])) {
$mediaTags[] = $downloadInfos['orignal'];
} else {
$mediaTags[] = sprintf(
"<img src=\"/%s/%s/%s\" alt=\"%s\">",
$this->getMyStorage()->getUploadPath(),
$downloadInfo["path"],
$downloadInfo["fileName"],
$downloadInfo["fileName"]
);
};
$mediaInfos[] = $downloadInfo;
}
//4. Video(mp4) 처리
$downloadInfos = $this->download($crawler, ["tag" => "video", "attr" => "src"]);
foreach ($downloadInfos as $downloadInfo) {
if ($this->getMySocket()->isContainsHttpOrHttps($downloadInfo['orignal'])) {
$mediaTags[] = $downloadInfos['orignal'];
} else {
$mediaTags[] = sprintf(
"<video src=\"/%s/%s/%s\" alt=\"%s\" controls autoplay>",
$this->getMyStorage()->getUploadPath(),
$downloadInfo["path"],
$downloadInfo["fileName"],
$downloadInfo["fileName"]
);
};
$mediaInfos[] = $downloadInfo;
}
log_message("debug", "-----mediaTags-----");
log_message("debug", var_export($mediaTags, true));
return array($mediaInfos, $mediaTags);
}
public function execute(): array
{
//1. 해당사이트 MainPage 처리
if ($this->getDebug()) {
$title = getenv("yamap.view.test.title");
$nickname = getenv("yamap.view.test.nickname");
$detail_url = getenv("yamap.view.test.url");
} else {
list($title, $nickname, $detail_url) = $this->mainPage(getenv("yamap.list.url"));
}
//2. DetailPage 처리 : bbs_view > div.contents 가진 객체를 찾아서 처리
list($mediaInfos, $mediaTags) = $this->detailPage($detail_url);
return array($title, $nickname, $mediaInfos, $mediaTags);
}
}

View File

@ -1,27 +0,0 @@
<?php
namespace App\Libraries;
use Symfony\Component\DomCrawler\Crawler;
class MyCrawlerLibrary extends CommonLibrary
{
public function __construct()
{
parent::__construct();
}
final public function create($html): Crawler
{
return new Crawler($html);
}
public function getNodes(Crawler $crawler, array $options, $nodes = []): array
{
$crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options, &$nodes): void {
log_message("debug", sprintf("getNode-> %s", $options["tag"]));
$nodes[] = $node;
}
);
return $nodes;
}
}

View File

@ -0,0 +1,13 @@
<?php
namespace App\Libraries\MySocket;
use App\Libraries\CommonLibrary;
class MySocketLibrary extends CommonLibrary
{
public function __construct()
{
parent::__construct();
}
}

View File

@ -1,11 +1,11 @@
<?php <?php
namespace App\Libraries; namespace App\Libraries\MySocket;
use GuzzleHttp\Cookie\CookieJar; use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Client; use GuzzleHttp\Client;
class MyWebLibrary extends CommonLibrary class WebLibrary extends MySocketLibrary
{ {
private $_client = null; private $_client = null;
private $_cookieJar = null; private $_cookieJar = null;
@ -37,6 +37,12 @@ class MyWebLibrary extends CommonLibrary
return $this->_cookieJar; return $this->_cookieJar;
} }
//url에 http 나 https가 포함되어 있으면 true
final public function isContainsHttpOrHttps($url): bool
{
return strpos($url, 'http://') !== false || strpos($url, 'https://') !== false;
}
public function getContent(string $url, array $options = []): string public function getContent(string $url, array $options = []): string
{ {
//url에 http 나 https가 포함되어 있지않으면 //url에 http 나 https가 포함되어 있지않으면
@ -75,11 +81,11 @@ class MyWebLibrary extends CommonLibrary
// 파일 다운로드 메서드 // 파일 다운로드 메서드
public function download(string $url): array public function download(string $url): array
{ {
log_message("debug", "donwload:URL-> " . $url);
$fileNames = explode('/', $url); $fileNames = explode('/', $url);
if (!is_array($fileNames) || !count($fileNames)) { if (!is_array($fileNames) || !count($fileNames)) {
throw new \Exception("Download URL Error:" . $url); throw new \Exception("Socket URL Error:" . $this->getHost() . $url);
} }
log_message("debug", "Socket URL-> " . $this->getHost() . $url);
$fileName = array_pop($fileNames); $fileName = array_pop($fileNames);
if (!$this->getDebug()) { if (!$this->getDebug()) {
$content = $this->getContent($url, [ $content = $this->getContent($url, [

View File

@ -4,28 +4,18 @@ namespace App\Libraries\MyStorage;
class FileLibrary extends MyStorageLibrary class FileLibrary extends MyStorageLibrary
{ {
private $_uploadPath = "";
private $_path = ""; private $_path = "";
private $_fileName = ""; private $_fileName = "";
public function __construct($uploadPath) public function __construct(string $path)
{ {
parent::__construct(); parent::__construct();
$this->_uploadPath = $uploadPath; $this->_path = $path;
}
final public function getUploadPath(): string
{
return $this->_uploadPath;
} }
final public function getPath(): string final public function getPath(): string
{ {
return $this->_path; return $this->_path;
} }
final public function setPath(string $path): void
{
$this->_path = $path;
}
final public function getFileName(): string final public function getFileName(): string
{ {
return $this->_fileName; return $this->_fileName;
@ -37,14 +27,14 @@ class FileLibrary extends MyStorageLibrary
final public function save($content): bool final public function save($content): bool
{ {
$fullPath = $this->getUploadPath() . DIRECTORY_SEPARATOR . $this->getPath(); $fullPath = WRITEPATH . $this->getUploadPath() . DIRECTORY_SEPARATOR . $this->getPath();
if (!is_dir($fullPath)) { if (!is_dir($fullPath)) {
if (!mkdir($fullPath)) { if (!mkdir($fullPath)) {
throw new \Exception("Make Directory Error:" . $fullPath); throw new \Exception("Make Directory Error:" . $fullPath);
} }
} }
$fileName = $fullPath . DIRECTORY_SEPARATOR . $this->getFileName(); $saveFile = $fullPath . DIRECTORY_SEPARATOR . $this->getFileName();
log_message("debug", "download:SavePath-> " . $fileName); log_message("debug", "Storage Save-> " . $saveFile);
return file_put_contents($fileName, $content); return file_put_contents($saveFile, $content);
} }
} }

View File

@ -6,10 +6,15 @@ use App\Libraries\CommonLibrary;
abstract class MyStorageLibrary extends CommonLibrary abstract class MyStorageLibrary extends CommonLibrary
{ {
private $_uploadPath = "uploads";
protected function __construct() protected function __construct()
{ {
parent::__construct(); parent::__construct();
} }
abstract public function save($content): bool; abstract public function save($content): bool;
final public function getUploadPath(): string
{
return $this->_uploadPath;
}
} }

View File

@ -1,194 +0,0 @@
<?php
namespace App\Libraries;
use Symfony\Component\DomCrawler\Crawler;
//Yamap
// define('YAMAP', [
// 'host' => ['url' => getenv('yamap.host.url')],
// 'list' => [
// 'url' => getenv('yamap.list.url'),
// 'tag' => getenv('yamap.list.tag'),
// 'item' => [
// 'tag' => getenv('yamap.list.item.tag'),
// 'subject' => [
// 'tag' => getenv('yamap.list.item.subject.tag')
// ],
// 'nickname' => [
// 'tag' => getenv('yamap.list.item.nickname.tag'),
// 'except' => getenv('yamap.list.item.nickname.except'),
// ],
// ],
// ],
// 'view' => [
// 'tag' => getenv('yamap.view.tag'),
// 'content' => [
// 'tag' => getenv('yamap.view.content.tag'),
// ],
// 'test' => [
// 'url' => getenv('yamap.view.test.url'),
// ]
// ],
// ]);
class YamapLibrary extends CommonLibrary
{
private $_myWeb = null;
private $_myStorage = null;
private $_myCrawler = null;
public function __construct()
{
parent::__construct();
}
public function getMyWeb()
{
if ($this->_myWeb === null) {
throw new \Exception("MyWeb Library가 정의되지 않았습니다.");
}
return $this->_myWeb;
}
public function setMyWeb($myWeb)
{
$this->_myWeb = $myWeb;
}
public function getMyStorage()
{
if ($this->_myStorage === null) {
throw new \Exception("MyStorage Library가 정의되지 않았습니다.");
}
return $this->_myStorage;
}
public function setMyStorage($myStorage)
{
$this->_myStorage = $myStorage;
}
public function getMyCrawler()
{
if ($this->_myWeb === null) {
throw new \Exception("MyCrawler Library가 정의되지 않았습니다.");
}
return $this->_myCrawler;
}
public function setMyCrawler($myCrawler)
{
$this->_myCrawler = $myCrawler;
}
private function getCrawler(string $url, string $tag): Crawler
{
$response = $this->getMyWeb()->getContent($url);
if (!$response) {
throw new \Exception("getCrawler 실패:{$url}");
}
return $this->getMyCrawler()->create($response)->filter($tag);
}
private function download_process(Crawler $crawler, array $options): array
{
$mediaInfos = [];
log_message("debug", "download:{$options["tag"]},{$options["attr"]}");
$nodes = $this->getMyCrawler()->getNodes($crawler, $options);
foreach ($nodes as $node) {
$original = $node->attr($options["attr"]);
list($fileName, $content) = $this->getMyWeb()->download($original);
$this->getMyStorage()->setFileName($fileName);
if (!$this->getMyStorage()->save($content)) {
continue;
}
$mediaInfos[] = [
"orignal" => $node->html(),
"path" => $this->getMyStorage()->getPath(),
"fileName" => $fileName,
"content" => $content
];
}
return $mediaInfos;
}
public function mainPage(): array
{
$url = getenv("yamap.list.url");
$crawler = $this->getCrawler($url, getenv("yamap.list.tag"));
$item_tag = getenv("yamap.list.item.tag");
$item_link_tag = getenv("yamap.list.item.link.tag");
$item_nickname_tag = getenv("yamap.list.item.nickname.tag");
$item_nickname_except = getenv("yamap.list.item.nickname.except");
$lists = [];
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$crawler->filter($item_tag)->each(
function (Crawler $node) use (
&$item_link_tag,
&$item_nickname_tag,
&$item_nickname_except,
&$lists
): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자가 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter($item_nickname_tag)->text();
log_message("debug", $item_nickname_tag . ":" . $nickname);
if ($nickname != $item_nickname_except) {
//작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$link_node = $node->filter($item_link_tag);
$url = $link_node->attr("href");
$title = $link_node->children()->last()->text();
$lists[] = ['title' => $title, 'nickname' => $nickname, 'url' => $url];
}
}
);
if (!count($lists)) {
throw new \Exception("Target URL이 없습니다.");
}
return array($lists[0]["title"], $lists[0]["nickname"], $lists[0]["url"]);
}
public function detailPage(string $url): array
{
$crawler = $this->getCrawler($url, getenv("yamap.view.content.tag"));
$mediaTags = [];
//3. Image 처리
$images = $this->download_process($crawler, ["tag" => "img", "attr" => "src"]);
foreach ($images as $image) {
if ($this->isContainsHttpOrHttps($image['orignal'])) {
$mediaTags[] = $images['orignal'];
} else {
$mediaTags[] = sprintf("<img src=\"/uploads/%s/%s\" alt=\"%s\">", $image["path"], $image["fileName"], $image["fileName"]);
};
}
//4. Video(mp4) 처리
$videos = $this->download_process($crawler, ["tag" => "video", "attr" => "src"]);
foreach ($videos as $video) {
if ($this->isContainsHttpOrHttps($video['orignal'])) {
$mediaTags[] = $videos['orignal'];
} else {
$mediaTags[] = sprintf("<video src=\"/uploads/%s/%s\" alt=\"%s\" controls autoplay>", $video["path"], $video["fileName"], $video["fileName"]);
};
}
$mediaInfos = array_merge($images, $videos);
log_message("debug", "-----mediaInfos-----");
foreach ($mediaInfos as $mediaInfo) {
log_message("debug", "fileName: " . $mediaInfo["path"] . DIRECTORY_SEPARATOR . $mediaInfo['fileName']);
}
log_message("debug", "-----mediaTags-----");
log_message("debug", var_export($mediaTags, true));
return array($mediaInfos, $mediaTags);
}
public function build(): array
{
//1. 해당사이트 MainPage 처리
if ($this->getDebug()) {
$title = getenv("yamap.view.test.title");
$nickname = getenv("yamap.view.test.nickname");
$detail_url = getenv("yamap.view.test.url");
} else {
list($title, $nickname, $detail_url) = $this->mainPage();
}
//2. DetailPage 처리 : bbs_view > div.contents 가진 객체를 찾아서 처리
list($mediaInfos, $mediaTags) = $this->detailPage($detail_url);
return array($title, $nickname, $mediaInfos, $mediaTags);
}
}