311 lines
14 KiB
PHP
311 lines
14 KiB
PHP
<?php
|
|
|
|
namespace App\Libraries\MyCrawler;
|
|
|
|
use App\Libraries\MySocket\WebSocket;
|
|
use App\Libraries\MyMangboard\Storage;
|
|
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
use App\Traits\FileTrait;
|
|
use App\Models\Mangboard\BoardsModel;
|
|
use App\Models\Mangboard\BoardModel;
|
|
use App\Libraries\CommonLibrary;
|
|
use App\Entities\Mangboard\UserEntity;
|
|
use App\Entities\Mangboard\BoardsEntity;
|
|
use App\Entities\Mangboard\BoardEntity;
|
|
|
|
abstract class MyCrawler extends CommonLibrary
|
|
{
|
|
use FileTrait;
|
|
private $_host = "";
|
|
private $_board_name = "";
|
|
private $_user_entity = null;
|
|
private $_mySocket = null;
|
|
private $_myStorage = null;
|
|
private $_board_model = null;
|
|
private $_user_model = null;
|
|
private $_boards_entity = null;
|
|
protected function __construct(string $host, string $board_name, UserEntity $user_entity)
|
|
{
|
|
parent::__construct();
|
|
$this->_host = $host;
|
|
$this->_board_name = $board_name;
|
|
$this->_user_entity = $user_entity;
|
|
}
|
|
abstract protected function getDetailSelector(array $listInfo): array;
|
|
//-----------------------필수항목-------------------//
|
|
final protected function getMySocket(): WebSocket
|
|
{
|
|
if ($this->_mySocket === null) {
|
|
$this->_mySocket = new WebSocket($this->_host);
|
|
}
|
|
return $this->_mySocket;
|
|
}
|
|
final protected function getMyStorage(): Storage
|
|
{
|
|
if ($this->_myStorage === null) {
|
|
$this->_myStorage = new Storage($this->_board_name, $this->_user_entity);
|
|
}
|
|
return $this->_myStorage;
|
|
}
|
|
final protected function getBoardsEntity(): BoardsEntity
|
|
{
|
|
if ($this->_boards_entity === null) {
|
|
$boardsModel = new BoardsModel();
|
|
$this->_boards_entity = $boardsModel->getEntityByID($this->getMyStorage()->getBoardName());
|
|
if ($this->_boards_entity === null) {
|
|
throw new \Exception(__FUNCTION__ . "=> {$this->getMyStorage()->getBoardName()}에 해당 Board 정보가 존재하지 않습니다.");
|
|
}
|
|
}
|
|
return $this->_boards_entity;
|
|
}
|
|
final protected function getBoardModel(): BoardModel
|
|
{
|
|
if ($this->_board_model === null) {
|
|
$this->_board_model = new BoardModel("mb_" . $this->getMyStorage()->getBoardName());
|
|
}
|
|
return $this->_board_model;
|
|
}
|
|
final protected function getSelector(string $content, string $tag): Crawler
|
|
{
|
|
$crawler = new Crawler($content);
|
|
if ($this->isDebug) {
|
|
log_message("debug", __FUNCTION__ . "=> " . $tag);
|
|
}
|
|
$crawler->filter($tag);
|
|
if ($this->isDebug) {
|
|
log_message("debug", sprintf(
|
|
"\n------------%s HTML-------------\n%s\n-----------------------------------------------------\n",
|
|
__FUNCTION__,
|
|
$crawler->filter($tag)->html()
|
|
));
|
|
}
|
|
return $crawler->filter($tag);
|
|
}
|
|
protected function changeURLByCrawler(string $url): string
|
|
{
|
|
return preg_match('/^[^?]+/', $url, $matches) ? $matches[0] : null;
|
|
}
|
|
protected function getUrlByMediaType(Crawler $node, string $media_tag, string $attr): null|string
|
|
{
|
|
switch ($media_tag) {
|
|
case 'video':
|
|
$url = $node->attr($attr); //<video src="test.mp4"></video> 또는 <video data-src="test.mp4"></video>
|
|
if ($url === null) {
|
|
$url = $node->children()->attr("src"); //<video><source src="test.mp4"></source</video>
|
|
}
|
|
break;
|
|
case 'img':
|
|
default:
|
|
$url = $node->attr($attr);
|
|
break;
|
|
}
|
|
return $url;
|
|
}
|
|
private function getUrlsByMediaType(Crawler $selector, string $media_tag, string $attr, array $urls = []): array
|
|
{
|
|
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_tag} 작업시작--------");
|
|
$urls[$media_tag] = [];
|
|
$selector->filter($media_tag)->each(
|
|
function (Crawler $node) use (&$media_tag, &$attr, &$urls): void {
|
|
$url = $this->getUrlByMediaType($node, $media_tag, $attr);
|
|
if ($url !== null && preg_match('/^[^?]+/', $url, $matches)) {
|
|
$urls[$media_tag][] = $this->changeURLByCrawler($matches[0]);
|
|
} else {
|
|
log_message("debug", __FUNCTION__ . "-> {$media_tag}:{$attr}\n");
|
|
//Node 모든 속성은 DOMElement 변환 후 반환가능
|
|
$domNode = $node->getNode(0);
|
|
if ($domNode->hasAttributes()) {
|
|
foreach ($domNode->attributes as $attr) {
|
|
log_message("debug", "{$attr->nodeName} = {$attr->nodeValue}");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
);
|
|
log_message("notice", "-----------" . __FUNCTION__ . "=> {$media_tag} 작업완료--------");
|
|
return $urls;
|
|
}
|
|
private function media_save(int $file_sequence, string $media_tag, string $file_name, string $content): mixed
|
|
{
|
|
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
|
|
$storage = clone $this->getMyStorage();
|
|
$storage->setOriginName($file_name);
|
|
$storage->setOriginContent($content);
|
|
$storage->setOriginMediaTag($media_tag);
|
|
$storage->setOriginSequence($file_sequence);
|
|
return $storage->save();
|
|
}
|
|
//ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
|
private function media_download(string $media_tag, string $url): array
|
|
{
|
|
$file_names = explode('/', $url);
|
|
if (!is_array($file_names) || !count($file_names)) {
|
|
throw new \Exception("URL이 파일명 형식이 아닙니다 : " . $this->getMySocket()->getHost() . $url);
|
|
}
|
|
$file_name = array_pop($file_names);
|
|
$temps = explode(".", $file_name);
|
|
$file_ext = array_pop($temps);
|
|
if (!$this->isFileType_FileTrait($file_ext, $media_tag)) {
|
|
throw new \Exception("파일명 형식이 {$media_tag}가 아닙니다");
|
|
}
|
|
$content = $this->getMySocket()->getContent($url);
|
|
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
|
return array($file_name, $content);
|
|
}
|
|
private function media_process(array $media_urls): array
|
|
{
|
|
$file_sequence = 1;
|
|
$storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
|
foreach ($media_urls as $media_tag => $urls) {
|
|
$total = count($urls);
|
|
foreach ($urls as $url) {
|
|
log_message("notice", __FUNCTION__ . " {$file_sequence}번째/총:{$total} MediaType->{$media_tag} 작업 시작");
|
|
try {
|
|
list($file_name, $content) = $this->media_download($media_tag, $url);
|
|
$storage = $this->media_save($file_sequence, $media_tag, $file_name, $content);
|
|
log_message("debug", __FUNCTION__ . " {$file_sequence}번째/총:{$total} 결과=>" . $storage->getOriginName());
|
|
$storages[] = $storage;
|
|
} catch (\Exception $e) {
|
|
log_message("warning", sprintf(
|
|
"\n---%s MediaType->%s {$file_sequence}번째/총:{$total} 오류---\n%s\n-----------------------------------------\n",
|
|
__FUNCTION__,
|
|
$media_tag,
|
|
$e->getMessage()
|
|
));
|
|
}
|
|
log_message("notice", __FUNCTION__ . " {$file_sequence}번째/총:{$total} MediaType->{$media_tag} 작업 완료");
|
|
$file_sequence++;
|
|
}
|
|
}
|
|
return $storages;
|
|
}
|
|
|
|
//Board 등록작업
|
|
private function create_board(int $cnt, array $listInfo, array $storages, array $formDatas = ['image_path' => "", 'content' => ""]): BoardEntity
|
|
{
|
|
//Board DB 등록작업등
|
|
//미디어관련정보 entity에 넣기
|
|
$formDatas[BoardModel::TITLE] = $listInfo["title"];
|
|
$formDatas['user_pid'] = $this->getMyStorage()->getUserEntity()->getPK();
|
|
$formDatas['user_id'] = $this->getMyStorage()->getUserEntity()->getID();
|
|
$formDatas['user_name'] = $listInfo["nickname"] != "" ? $listInfo["nickname"] : $this->getMyStorage()->getUserEntity()->getTitle();
|
|
$formDatas['level'] = $this->getBoardsEntity()->getListLevel();
|
|
$formDatas['hit'] = intval($listInfo['hit']);
|
|
$formDatas['reg_date'] = date("Y-m-d H:i:s", strtotime($listInfo['date']));
|
|
$formDatas['data_type'] = "html";
|
|
$formDatas['editor_type'] = "S";
|
|
foreach ($storages as $storage) {
|
|
if ($formDatas['image_path'] == "") {
|
|
$formDatas['image_path'] = $storage->getBasePath() . DIRECTORY_SEPARATOR . $storage->getPath() . DIRECTORY_SEPARATOR . $storage->getOriginName();
|
|
}
|
|
$formDatas['content'] .= $storage->getHTMLTag();
|
|
}
|
|
//망보드 게시판에 등록
|
|
if ($formDatas['content'] == "") {
|
|
throw new \Exception(sprintf(
|
|
"%s=>%s번째 %s 내용이 없어 => %s 등록 안함 : storage->%s",
|
|
__FUNCTION__,
|
|
$cnt,
|
|
$listInfo["title"],
|
|
$this->getBoardModel()->getTable(),
|
|
count($storages)
|
|
));
|
|
}
|
|
$board_entity = $this->getBoardModel()->create($formDatas);
|
|
log_message("notice", sprintf(
|
|
"%s=>%s번째 %s => %s 등록 완료 : storage->%s",
|
|
__FUNCTION__,
|
|
$cnt,
|
|
$listInfo["title"],
|
|
$this->getBoardModel()->getTable(),
|
|
count($storages)
|
|
));
|
|
return $board_entity;
|
|
}
|
|
//File DB 등록작업, 작은이미지 생성
|
|
private function create_storages(BoardEntity $board_entity, array $storages)
|
|
{
|
|
foreach ($storages as $storage) {
|
|
try {
|
|
$storage->create($this->getBoardsEntity(), $board_entity, $this->getBoardModel()->getTable());
|
|
} catch (\Exception $e) {
|
|
log_message("notice", sprintf(
|
|
"\n---%s -> %s 게시물의 %s번째:%s 파일 등록 오류---\n%s\n--------------------------------\n",
|
|
__FUNCTION__,
|
|
$board_entity->getTitle(),
|
|
$storage->getOriginSequence(),
|
|
$storage->getOriginName(),
|
|
$e->getMessage()
|
|
));
|
|
}
|
|
}
|
|
}
|
|
private function detail_copy_process(int $cnt, array $listInfo): array
|
|
{
|
|
list($selector, $listInfo) = $this->getDetailSelector($listInfo);
|
|
$formDatas = [];
|
|
$formDatas['image_path'] = "";
|
|
$formDatas['content'] = $selector->html();
|
|
//Board 등록작업등
|
|
$this->create_board($cnt, $listInfo, [], $formDatas);
|
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
return $listInfo;
|
|
}
|
|
private function detail_download_process(int $cnt, array $listInfo): array
|
|
{
|
|
list($selector, $listInfo) = $this->getDetailSelector($listInfo);
|
|
$media_urls = $this->getUrlsByMediaType($selector, "img", "src");
|
|
$media_urls = $this->getUrlsByMediaType($selector, "video", "src", $media_urls);
|
|
if ($this->isDebug) {
|
|
throw new \Exception(sprintf(
|
|
"\n--------------%s Debug--------------\n%s%s\n---------------------------------------\n",
|
|
__FUNCTION__,
|
|
var_export($listInfo, true),
|
|
var_export($media_urls, true)
|
|
));
|
|
} else {
|
|
// Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
|
$storages = $this->media_process($media_urls);
|
|
if (!count($storages)) {
|
|
throw new \Exception("등록할 자료가 없습니다.");
|
|
}
|
|
//Board 등록작업등
|
|
$board_entity = $this->create_board($cnt, $listInfo, $storages);
|
|
//File DB 등록작업, 작은이미지 생성
|
|
$this->create_storages($board_entity, $storages);
|
|
}
|
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
return $listInfo;
|
|
}
|
|
protected function list_process(int $max_limit, array $listInfos): void
|
|
{
|
|
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
|
|
$max_limit = !$max_limit || count($listInfos) <= $max_limit ? count($listInfos) : $max_limit;
|
|
$total = count($listInfos);
|
|
$i = 1;
|
|
foreach ($listInfos as $listInfo) {
|
|
if ($i <= $max_limit) {
|
|
log_message("notice", __FUNCTION__ . " 게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업시작");
|
|
try {
|
|
if ($this->isCopy) {
|
|
$listInfo = $this->detail_copy_process($i, $listInfo);
|
|
} else {
|
|
//listInfo는 title,작성자,작성시간등등의 정보를 가지고 있어 detail_process 처리 안에서 바뀔 수 있으므로 다시 반환 받는다.
|
|
$listInfo = $this->detail_download_process($i, $listInfo);
|
|
}
|
|
} catch (\Exception $e) {
|
|
log_message("warning", sprintf(
|
|
"\n---%s {$i}번째/총:{$total} 오류---\n%s\n-----------------------------------------\n",
|
|
__FUNCTION__,
|
|
$e->getMessage()
|
|
));
|
|
}
|
|
log_message("notice", __FUNCTION__ . " 게시물 {$i}번째/총:{$total} {$listInfo["nickname"]} 작업완료.");
|
|
$i++;
|
|
}
|
|
}
|
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
}
|
|
}
|