Automation init...

This commit is contained in:
최준흠 2024-09-04 18:59:45 +09:00
parent c33825df91
commit 749c691e11
12 changed files with 231 additions and 311 deletions

View File

@ -35,7 +35,7 @@ class UserController extends AdminController
}
$sign = $this->request->getPost('point') ?: "+";
$entity = $this->setUserPointByMangboardTrait($entity, intval($point), $sign);
$entity = $this->setUserPointByMangboard($entity, intval($point), $sign);
return "완료되었습니다.";
} catch (\Exception $e) {
log_message("error", $e->getMessage());

View File

@ -14,19 +14,25 @@ class Crawler extends BaseController
$library = new YamapLibrary(getenv("yamap.host"));
$library->setDebug($isDebug);
//1. MainPage
$url = getenv("yamap.url.main");
$crawler = $library->getCrawlerByMainPage($url);
$links = $library->getLinks($crawler);
if (!count($links)) {
throw new \Exception("Target Links가 없습니다.");
$url = getenv("yamap.list.url");
$crawler = $library->getCrawler($url, getenv("yamap.list.tag"));
$urls = $library->getListURLs(
$crawler,
getenv("yamap.list.item.tag"),
getenv("yamap.list.item.subject.tag"),
getenv("yamap.list.item.nickname.tag"),
getenv("yamap.list.item.nickname.skip")
);
if (!count($urls)) {
throw new \Exception("Target URL이 없습니다.");
}
//2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$url = $isDebug ? getenv("yamap.url.test") : $links[0]["href"];
$crawler = $library->getCrawlerByDetailPage($url);
$url = $isDebug ? getenv("yamap.view.test.url") : $urls[0];
$crawler = $library->getCrawler($url, getenv("yamap.view.content.tag"));
//3. Image
$library->getImages($crawler);
$library->download($crawler, ["tag" => "img", "attr" => "src"]);
//4. Video
$library->getVideos($crawler);
$library->download($crawler, ["tag" => "video", "attr" => "src"]);
log_message("info", "완료되었습니다.");
return true;
} catch (\Exception $e) {

View File

@ -16,7 +16,7 @@ class Mangboard extends BaseController
try {
$userModel = new UserModel();
foreach ($userModel->getEntitys() as $entity) {
$entity = $this->setUserLevelByMangboardTrait($entity);
$entity = $this->setUserLevelByMangboard($entity);
log_message("debug", __FUNCTION__ . "=>[{$entity}] 회원님의 Level은 {$entity->getLevel()} 입니다.");
}
log_message("info", "완료되었습니다.");

View File

@ -1,79 +0,0 @@
<?php
namespace App\Libraries\MyCrawler;
use Symfony\Component\DomCrawler\Crawler;
class MyCrawlerLibrary
{
private $_debug = false;
public function __construct() {}
final public function getDebug(): bool
{
return $this->_debug;
}
final public function setDebug(bool $debug): void
{
$this->_debug = $debug;
}
final public function createCrawler($html)
{
return new Crawler($html);
}
final public function getInnerHTML(string $html, $tag = false)
{
return $tag ? $this->createCrawler($html)->filter($tag)->html() : $this->createCrawler($html)->html();
}
public function getLinks(Crawler $crawler, array $options = ["tag" => "a", "attr" => "href"]): array
{
$links = $crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options): array {
return [
"anchor" => $node->text(),
"href" => $node->attr($options["attr"])
];
}
);
foreach ($links as $link) {
log_message("debug", "Link-> " . $link['href']);
}
return $links;
}
public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array
{
$images = $crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options): array {
return [
"alt" => $node->attr('alt'),
"src" => $node->attr($options["attr"])
];
}
);
foreach ($images as $image) {
log_message("debug", "Image-> " . $image['src']);
}
return $images;
}
public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array
{
$videos = $crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options): array {
return [
"alt" => $node->attr('alt'),
"src" => $node->attr($options["attr"])
];
}
);
foreach ($videos as $video) {
log_message("debug", "Video-> " . $video['src']);
}
return $videos;
}
}

View File

@ -1,28 +0,0 @@
<?php
namespace App\Libraries\MyStorage;
class MyStorageLibrary
{
private $_path = WRITEPATH . "uploads";
private $_debug = false;
public function __construct() {}
final public function getPath(): string
{
return $this->_path;
}
final public function setPath(string $path): void
{
$this->_path .= DIRECTORY_SEPARATOR . $path;
}
final public function getDebug(): bool
{
return $this->_debug;
}
final public function setDebug(bool $debug): void
{
$this->_debug = $debug;
}
}

View File

@ -1,78 +0,0 @@
<?php
namespace App\Libraries\MyWeb;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Client;
class MyWebLibrary
{
private $_host = "";
private $_client = null;
private $_cookieJar = null;
private $_debug = false;
public function __construct(string $host)
{
$this->_host = $host;
}
final public function getHost(): string
{
return $this->_host;
}
final public function getClient(): Client
{
if ($this->_client === null) {
$this->_client = new Client(['verify' => false]);
}
return $this->_client;
}
final public function getCookieJar()
{
if ($this->_cookieJar === null) {
$this->_cookieJar = new CookieJar();
}
return $this->_cookieJar;
}
final public function getDebug(): bool
{
return $this->_debug;
}
final public function setDebug(bool $debug): void
{
$this->_debug = $debug;
}
final public function getContent(string $url): string
{
return $this->getClient()->get($this->gethost() . $url)->getBody();
}
// 로그인 메서드
final public function login($url, $username, $password)
{
try {
$response = $this->getClient()->post($this->gethost() . $url, [
'form_params' => [
'username' => $username,
'password' => $password,
],
'cookies' => $this->getCookieJar(),
]);
if ($response->getStatusCode() == 200) {
log_message("info", "로그인 성공!");
return true;
} else {
log_message("info", "로그인 실패: " . $response->getStatusCode());
return false;
}
} catch (\Exception $e) {
log_message("error", "로그인 중 오류 발생: " . $e->getMessage());
return false;
}
}
}

View File

@ -2,47 +2,19 @@
namespace App\Libraries;
use App\Libraries\MyWeb\MyWebLibrary;
use App\Libraries\MyStorage\MyStorageLibrary;
use App\Libraries\MyCrawler\MyCrawlerLibrary;
use App\Traits\MyCrawlerTrait;
use App\Traits\MyWebTrait;
use App\Traits\MyStorage\MyStorageFileTrait;
use Symfony\Component\DomCrawler\Crawler;
class YamapLibrary
{
private $_web = null;
private $_storage = null;
private $_crawler = null;
use MyWebTrait, MyStorageFileTrait, MyCrawlerTrait;
private $_debug = false;
private $_host = null;
public function __construct(string $host)
{
$this->_host = $host;
}
public function getMyWeb(): MyWebLibrary
{
if ($this->_web === null) {
$this->_web = new MyWebLibrary($this->getHost());
$this->_web->setDebug($this->getDebug());
}
return $this->_web;
}
public function getMyStorage(): MyStorageLibrary
{
if ($this->_storage === null) {
$this->_storage = new MyStorageLibrary();
$this->_storage->setDebug($this->getDebug());
}
return $this->_storage;
}
public function getMyCrawler(): MyCrawlerLibrary
{
if ($this->_crawler === null) {
$this->_crawler = new MyCrawlerLibrary();
$this->_crawler->setDebug($this->getDebug());
}
return $this->_crawler;
$this->setHostByMyWeb($host);
$this->setPathByMyStorage(WRITEPATH . "uploads" . DIRECTORY_SEPARATOR . "Yamap");
}
final public function getDebug(): bool
@ -53,92 +25,48 @@ class YamapLibrary
{
$this->_debug = $debug;
}
final public function getHost(): string
public function getCrawler(string $url, string $tag): Crawler
{
return $this->_host;
log_message("debug", __FUNCTION__ . "-> " . $url . "\n");
$html = $this->getContentByMyWeb($url);
return $this->createByMyCrawler($html)->filter($tag);
}
public function getLinks(Crawler $crawler): array
{
public function getListURLs(
Crawler $crawler,
string $item_tag,
string $item_subject_tag,
string $item_nickname_tag,
string $item_nickname_skip,
array $urls = []
): array {
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$bbs_items = $crawler->filter("div.bbs_item")->first()->siblings();
$links = [];
foreach ($bbs_items as $bbs_item) {
//bbs_item에서 span.g_nickname 객체를 찾아서 "관리자"가 작성한것이 아닌것을 확인 후
$this->getMyCrawler()->createCrawler($bbs_item)->filter("span.g_nickname")->each(function (Crawler $node) use (&$links, &$bbs_item) {
if ($node->text() != "관리자") {
//다시 bbs_item에서 a.list_subject 객체를 찾아서 Links에 추가한다.
foreach ($this->getMyCrawler()->getLinks($this->getMyCrawler()->createCrawler($bbs_item), ["tag" => ".list_subject", "attr" => "href"]) as $link) {
array_push($links, $link);
}
}
});
}
return $links;
}
public function getImages(Crawler $crawler, array $options = ["tag" => "img", "attr" => "src"]): array
{
$images = $this->getMyCrawler()->getImages($crawler, $options);
foreach ($images as $image) {
$this->download($image['src']);
}
return $images;
}
public function getVideos(Crawler $crawler, array $options = ["tag" => "video", "attr" => "src"]): array
{
$videos = $this->getMyCrawler()->getVideos($crawler, $options);
foreach ($videos as $video) {
$this->download($video['src']);
}
return $videos;
}
// 파일 다운로드 메서드
final public function download($url): bool
{
try {
$fileNames = explode('/', $url);
if (!is_array($fileNames) || !count($fileNames)) {
throw new \Exception("Download URL Error:" . $url);
}
if (!is_dir($this->getMyStorage()->getPath())) {
if (!mkdir($this->getMyStorage()->getPath())) {
throw new \Exception("Make Directory Error:" . $this->getMyStorage()->getPath());
$crawler->filter($item_tag)->each(
function (Crawler $node) use (
&$urls,
$item_subject_tag,
&$item_nickname_tag,
&$item_nickname_skip
): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자거 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter($item_nickname_tag)->text();
log_message("debug", $item_nickname_tag . ":" . $nickname);
if ($nickname != $item_nickname_skip) {
$options = ["tag" => $item_subject_tag, "attr" => "href"];
$urls = $this->getTagDatasByMyCrawler($node, $options);
}
}
$fullPath = $this->getMyStorage()->getPath() . DIRECTORY_SEPARATOR . array_pop($fileNames);
log_message("debug", "FullPath-> " . $fullPath);
if (!$this->getDebug()) {
$response = $this->getMyWeb()->getClient()->get($this->getMyWeb()->gethost() . $url, [
'cookies' => $this->getMyWeb()->getCookieJar(),
'sink' => $fullPath,
]);
if ($response->getStatusCode() == 200) {
log_message("info", "파일이 성공적으로 다운로드되었습니다!");
return true;
} else {
log_message("info", "파일 다운로드 실패: " . $response->getStatusCode());
return false;
}
}
return true;
} catch (\Exception $e) {
log_message("error", "파일 다운로드 중 오류 발생: " . $e->getMessage());
return false;
}
);
return $urls;
}
public function getCrawlerByMainPage(string $url): Crawler
public function download(Crawler $crawler, array $options): void
{
$html = $this->getMyWeb()->getContent($url);;
return $this->getMyCrawler()->createCrawler($html)->filter("div.bbs_list")->first();
}
public function getCrawlerByDetailPage(string $url): Crawler
{
log_message("debug", "Target-> " . $url);
$html = $this->getMyWeb()->getContent($url);;
return $this->getMyCrawler()->createCrawler($html)->filter("div.contents")->first();
log_message("debug", "download:{$options["tag"]},{$options["attr"]}");
$urls = $this->getTagDatasByMyCrawler($crawler, $options);
foreach ($urls as $url) {
$this->downloadByMyWeb($url, $this->getPathByMyStorage(), $this->getDebug());
}
}
}

View File

@ -8,7 +8,7 @@ use App\Models\UserModel;
trait MangboardTrait
{
private $_userModel = null;
public function getUserModel(): UserModel
protected function getUserModel(): UserModel
{
if (is_null($this->_userModel)) {
$this->_userModel = new UserModel();
@ -16,7 +16,7 @@ trait MangboardTrait
return $this->_userModel;
}
public function setUserPointByMangboardTrait(UserEntity $entity, int $point, $sign = '+'): UserEntity
protected function setUserPointByMangboard(UserEntity $entity, int $point, $sign = '+'): UserEntity
{
switch ($sign) {
case '-':
@ -36,7 +36,7 @@ trait MangboardTrait
return $this->setUserLevelByMangboardTrait($entity);
}
public function setUserLevelByMangboardTrait(UserEntity $entity): UserEntity
protected function setUserLevelByMangboard(UserEntity $entity): UserEntity
{
//Admin용 Level로는 변경불가
if ($entity->getLevel() == MANGBOARD['admin']['level']) {

View File

@ -0,0 +1,23 @@
<?php
namespace App\Traits;
use Symfony\Component\DomCrawler\Crawler;
trait MyCrawlerTrait
{
final protected function createByMyCrawler($html)
{
return new Crawler($html);
}
public function getTagDatasByMyCrawler(Crawler $crawler, array $options = ["tag" => "a", "attr" => "href"], array $tagdatas = []): array
{
$crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$tagdatas, &$options): void {
log_message("debug", sprintf("getTagDatas-> %s:%s", $options["tag"], $node->attr($options["attr"])));
$tagdatas[] = $node->attr($options["attr"]);
}
);
return $tagdatas;
}
}

View File

@ -0,0 +1,13 @@
<?php
namespace App\Traits\MyStorage;
trait MyStorageFileTrait
{
use MyStorageTrait;
final protected function saveByMyStorage(string $savePath, $content): bool
{
return file_put_contents($savePath, $content);
}
}

View File

@ -0,0 +1,17 @@
<?php
namespace App\Traits\MyStorage;
trait MyStorageTrait
{
private $_path = "";
final protected function getPathByMyStorage(): string
{
return $this->_path;
}
final protected function setPathByMyStorage(string $path): void
{
$this->_path = $path;
}
}

118
app/Traits/MyWebTrait.php Normal file
View File

@ -0,0 +1,118 @@
<?php
namespace App\Traits;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Client;
trait MyWebTrait
{
private $_client = null;
private $_cookieJar = null;
private $_host = null;
final protected function getHostByMyWeb(): string
{
return $this->_host;
}
final protected function setHostByMyWeb(string $host): void
{
$this->_host = $host;
}
final protected function getClientByMyWeb(): Client
{
if ($this->_client === null) {
$this->_client = new Client(['verify' => false]);
}
return $this->_client;
}
final protected function getCookieJarByMyWeb()
{
if ($this->_cookieJar === null) {
$this->_cookieJar = new CookieJar();
}
return $this->_cookieJar;
}
//url에 http 나 https가 포함되어 있으면 true
final protected function isContainsHttpOrHttpsByMyWeb($url)
{
return strpos($url, 'http://') !== false || strpos($url, 'https://') !== false;
}
final protected function getContentByMyWeb(string $url, array $options = [])
{
//url에 http 나 https가 포함되어 있지않으면
if (!($this->isContainsHttpOrHttpsByMyWeb($url))) {
$url = $this->gethostByMyWeb() . $url;
}
$response = $this->getClientByMyWeb()->get($url, $options);
if ($response->getStatusCode() != 200) {
log_message("error", "{$url} 접속실패: " . $response->getStatusCode());
return false;
}
return $response->getBody()->getContents();
}
// 로그인 메서드
final protected function loginByMyWeb($url, $username, $password)
{
try {
$response = $this->getClientByMyWeb()->post($this->gethost() . $url, [
'form_params' => [
'username' => $username,
'password' => $password,
],
'cookies' => $this->getCookieJar(),
]);
if ($response->getStatusCode() == 200) {
log_message("info", "로그인 성공!");
return true;
} else {
log_message("info", "로그인 실패: " . $response->getStatusCode());
return false;
}
} catch (\Exception $e) {
log_message("error", "로그인 중 오류 발생: " . $e->getMessage());
return false;
}
}
// 파일 다운로드 메서드
final protected function downloadByMyWeb(string $url, string $fullPath, bool $debug = false): bool
{
try {
log_message("debug", "donwload:URL-> " . $url);
$fileNames = explode('/', $url);
if (!is_array($fileNames) || !count($fileNames)) {
throw new \Exception("Download URL Error:" . $url);
}
if (!is_dir($fullPath)) {
if (!mkdir($fullPath)) {
throw new \Exception("Make Directory Error:" . $fullPath);
}
}
$fileName = array_pop($fileNames);
$savePath = $fullPath . DIRECTORY_SEPARATOR . $fileName;
log_message("debug", "download:SavePath-> " . $savePath);
if (!$debug) {
$response = $this->getContentByMyWeb($url, [
'cookies' => $this->getCookieJarByMyWeb(),
// 'sink' => $savePath,
]);
if (!$response) {
log_message("info", "{$fileName} 파일 다운로드 실패");
return false;
}
$this->saveByMyStorage($savePath, $response);
log_message("info", "{$fileName} 파일이 다운로드되었습니다!");
return true;
}
return true;
} catch (\Exception $e) {
log_message("error", "다운로드 중 오류 발생: " . $e->getMessage());
return false;
}
}
}