Automation init...3
This commit is contained in:
parent
4433192604
commit
3341cdecd2
@ -28,7 +28,7 @@ class CrawlerController extends CommonController
|
|||||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||||
$crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity);
|
$crawler = new YamapCrawler(getenv('yamap.host.url'), $category, $user_entity);
|
||||||
$crawler->setDebug($debug === "true" ? true : false);
|
$crawler->setDebug($debug === "true" ? true : false);
|
||||||
$crawler->execute();
|
$crawler->execute(intval(getenv("yamap.list.max_limit")));
|
||||||
return "완료되었습니다.";
|
return "완료되었습니다.";
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
log_message("error", $e->getMessage());
|
log_message("error", $e->getMessage());
|
||||||
@ -45,7 +45,7 @@ class CrawlerController extends CommonController
|
|||||||
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
//2. 필요한 로그인한 사용자정보,Socket,Storage 정의후 Crawler에게 전달.
|
||||||
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity);
|
$crawler = new YamoonCrawler(getenv("yamoon.host.url"), $category, $user_entity);
|
||||||
$crawler->setDebug($debug === "true" ? true : false);
|
$crawler->setDebug($debug === "true" ? true : false);
|
||||||
$crawler->execute();
|
$crawler->execute(intval(getenv("yamap.list.max_limit")));
|
||||||
return "완료되었습니다.";
|
return "완료되었습니다.";
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
log_message("error", $e->getMessage());
|
log_message("error", $e->getMessage());
|
||||||
|
|||||||
@ -17,7 +17,9 @@ abstract class MyCrawler extends CommonLibrary
|
|||||||
$this->_mySocket = $mySocket;
|
$this->_mySocket = $mySocket;
|
||||||
}
|
}
|
||||||
abstract protected function getMyStorage();
|
abstract protected function getMyStorage();
|
||||||
abstract public function execute(): void;
|
abstract protected function list_page(): array;
|
||||||
|
abstract protected function detail_page(array $listInfo): array;
|
||||||
|
abstract protected function backend_process(int $i, array $listInfo, array $storages);
|
||||||
final protected function getMySocket()
|
final protected function getMySocket()
|
||||||
{
|
{
|
||||||
if ($this->_mySocket === null) {
|
if ($this->_mySocket === null) {
|
||||||
@ -41,15 +43,15 @@ abstract class MyCrawler extends CommonLibrary
|
|||||||
}
|
}
|
||||||
|
|
||||||
//--------미디어 URL관련------
|
//--------미디어 URL관련------
|
||||||
private function getMediaUrlsByMediaType(string $mediaType, Crawler $selector, array $options, array $urls = []): array
|
private function getMediaUrlsByMediaType(string $media_type, Crawler $selector, array $options, array $urls = []): array
|
||||||
{
|
{
|
||||||
$urls[$mediaType] = [];
|
$urls[$media_type] = [];
|
||||||
$selector->filter($options["tag"])->each(
|
$selector->filter($options["tag"])->each(
|
||||||
function (Crawler $node) use (&$mediaType, &$options, &$urls): void {
|
function (Crawler $node) use (&$media_type, &$options, &$urls): void {
|
||||||
$url = $node->attr($options["attr"]);
|
$url = $node->attr($options["attr"]);
|
||||||
log_message("debug", __FUNCTION__ . "-> {$mediaType}[{$options["attr"]}]:{$url}");
|
log_message("debug", __FUNCTION__ . "-> {$media_type}[{$options["attr"]}]:{$url}");
|
||||||
if (!is_null($url)) {
|
if (!is_null($url)) {
|
||||||
$urls[$mediaType][] = $url;
|
$urls[$media_type][] = $url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
@ -68,17 +70,17 @@ abstract class MyCrawler extends CommonLibrary
|
|||||||
}
|
}
|
||||||
|
|
||||||
//--------미디어 관련-------
|
//--------미디어 관련-------
|
||||||
private function mediaSave(int $file_sequence, string $mediaType, string $file_name, string $content): void
|
private function media_save(int $file_sequence, string $media_type, string $file_name, string $content): void
|
||||||
{
|
{
|
||||||
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
|
log_message("debug", __FUNCTION__ . " 원본파일 {$file_name} 작업 시작");
|
||||||
$this->getMyStorage()->setOriginName($file_name);
|
$this->getMyStorage()->setOriginName($file_name);
|
||||||
$this->getMyStorage()->setOriginContent($content);
|
$this->getMyStorage()->setOriginContent($content);
|
||||||
$this->getMyStorage()->setOriginType($mediaType);
|
$this->getMyStorage()->setOriginType($media_type);
|
||||||
$this->getMyStorage()->setOriginSequence($file_sequence);
|
$this->getMyStorage()->setOriginSequence($file_sequence);
|
||||||
$this->_storages[] = $this->getMyStorage()->save();
|
$this->_storages[] = $this->getMyStorage()->save();
|
||||||
}
|
}
|
||||||
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
//Yamap ViewPage의 이미지나영상데이터가 있으면 Dodownload 한다.
|
||||||
private function mediaDownload(string $mediaType, string $url): array
|
private function media_download(string $media_type, string $url): array
|
||||||
{
|
{
|
||||||
$file_names = explode('/', $url);
|
$file_names = explode('/', $url);
|
||||||
if (!is_array($file_names) || !count($file_names)) {
|
if (!is_array($file_names) || !count($file_names)) {
|
||||||
@ -87,33 +89,33 @@ abstract class MyCrawler extends CommonLibrary
|
|||||||
$file_name = array_pop($file_names);
|
$file_name = array_pop($file_names);
|
||||||
$temps = explode(".", $file_name);
|
$temps = explode(".", $file_name);
|
||||||
$file_ext = array_pop($temps);
|
$file_ext = array_pop($temps);
|
||||||
if (!$this->isFileType_FileTrait($file_ext, $mediaType)) {
|
if (!$this->isFileType_FileTrait($file_ext, $media_type)) {
|
||||||
throw new \Exception("파일명 형식이 {$mediaType}가 아닙니다");
|
throw new \Exception("파일명 형식이 {$media_type}가 아닙니다");
|
||||||
}
|
}
|
||||||
$content = $this->getMySocket()->getContent($url);
|
$content = $this->getMySocket()->getContent($url);
|
||||||
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
log_message("notice", "{$file_name} 파일이 다운로드되었습니다!");
|
||||||
return array($file_name, $content);
|
return array($file_name, $content);
|
||||||
}
|
}
|
||||||
final protected function mediaProcess(array $urls): array
|
final protected function media_process(array $media_urls): array
|
||||||
{
|
{
|
||||||
$file_sequence = 1;
|
$file_sequence = 1;
|
||||||
$this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
$this->_storages = []; //CreateBoard에서 사용을 위해 DetailPage마다 초기화
|
||||||
// log_message("debug", var_export($urls, true));
|
// log_message("debug", var_export($urls, true));
|
||||||
foreach ($urls as $mediaType => $media_urls) {
|
foreach ($media_urls as $media_type => $urls) {
|
||||||
foreach ($media_urls as $url) {
|
foreach ($urls as $url) {
|
||||||
try {
|
try {
|
||||||
if ($url === null) {
|
if ($url === null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
list($file_name, $content) = $this->mediaDownload($mediaType, $url);
|
list($file_name, $content) = $this->media_download($media_type, $url);
|
||||||
$this->mediaSave($file_sequence, $mediaType, $file_name, $content);
|
$this->media_save($file_sequence, $media_type, $file_name, $content);
|
||||||
$file_sequence++;
|
$file_sequence++;
|
||||||
log_message("notice", __FUNCTION__ . " OriginType->{$mediaType} 작업 완료");
|
log_message("notice", __FUNCTION__ . " OriginType->{$media_type} 작업 완료");
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
log_message("warning", sprintf(
|
log_message("warning", sprintf(
|
||||||
"\n---%s,OriginType->%s 오류---\n%s\n-----------------------------------------\n",
|
"\n---%s mediaType->%s 오류---\n%s\n-----------------------------------------\n",
|
||||||
__FUNCTION__,
|
__FUNCTION__,
|
||||||
$mediaType,
|
$media_type,
|
||||||
$e->getMessage()
|
$e->getMessage()
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
@ -124,4 +126,36 @@ abstract class MyCrawler extends CommonLibrary
|
|||||||
}
|
}
|
||||||
return $this->_storages;
|
return $this->_storages;
|
||||||
}
|
}
|
||||||
|
protected function main_process(int $max_limit, array $listInfos): void
|
||||||
|
{
|
||||||
|
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
|
||||||
|
if ($max_limit) {
|
||||||
|
$max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit;
|
||||||
|
} else {
|
||||||
|
$max_limit = count($listInfos);
|
||||||
|
}
|
||||||
|
$total = count($listInfos);
|
||||||
|
$i = 1;
|
||||||
|
foreach ($listInfos as $listInfo) {
|
||||||
|
if ($i <= $max_limit) {
|
||||||
|
try {
|
||||||
|
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작");
|
||||||
|
//listInfo는 title,작성자,작성시간등등의 정보를 가지고 있어 detail_page 처리 안에서 바뀔 수 있으므로 다시 반환 받는다.
|
||||||
|
list($listInfo, $media_urls) = $this->detail_page($listInfo);
|
||||||
|
//Image 나 Video 소스들의 url을 가져와서 실제 다운받는 처리
|
||||||
|
$this->media_process($media_urls);
|
||||||
|
//File DB 및 Board DB 등록작업등
|
||||||
|
$this->backend_process($i, $listInfo, $this->_storages);
|
||||||
|
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료.");
|
||||||
|
$i++;
|
||||||
|
} catch (\Exception $e) {
|
||||||
|
log_message("warning", sprintf(
|
||||||
|
"\n---%s 오류---\n%s\n-----------------------------------------\n",
|
||||||
|
__FUNCTION__,
|
||||||
|
$e->getMessage()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -55,13 +55,13 @@ class YamapCrawler extends MyCrawler
|
|||||||
// </div>
|
// </div>
|
||||||
// <div id="freesubframe"></div>
|
// <div id="freesubframe"></div>
|
||||||
// </div>
|
// </div>
|
||||||
private function detailPage(array $listInfo): array
|
protected function detail_page(array $listInfo): array
|
||||||
{
|
{
|
||||||
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
$response = $this->getMySocket()->getContent($listInfo['detail_url']);
|
||||||
$tag = getenv("yamap.view.content.tag");
|
$tag = getenv("yamap.view.content.tag");
|
||||||
return $this->getMediaUrls($response, $tag, $listInfo);
|
return $this->getMediaUrls($response, $tag, $listInfo);
|
||||||
}
|
}
|
||||||
private function listPage(): array
|
protected function list_page(): array
|
||||||
{
|
{
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
return [
|
return [
|
||||||
@ -98,35 +98,17 @@ class YamapCrawler extends MyCrawler
|
|||||||
log_message("notice", __FUNCTION__ . " 작업 완료");
|
log_message("notice", __FUNCTION__ . " 작업 완료");
|
||||||
return $listInfos;
|
return $listInfos;
|
||||||
}
|
}
|
||||||
public function execute(): void
|
protected function backend_process(int $i, array $listInfo, array $storages)
|
||||||
{
|
{
|
||||||
$listInfos = $this->listPage();
|
//File DB 및 Board DB 등록작업
|
||||||
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
|
$board_entity = $this->getMyStorage()->getBoard()->createByCrawler($i, $listInfo, $storages);
|
||||||
$max_limit = intval(getenv("yamap.list.max_limit"));
|
$this->getMyStorage()->getFile()->createByCrawler($board_entity, $storages);
|
||||||
if ($max_limit) {
|
$this->getMyStorage()->getImage()->createByCrawler($board_entity, $storages);
|
||||||
$max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit;
|
}
|
||||||
} else {
|
public function execute(int $max_limit): void
|
||||||
$max_limit = count($listInfos);
|
{
|
||||||
}
|
$listInfos = $this->list_page();
|
||||||
$total = count($listInfos);
|
$this->main_process($max_limit, $listInfos);
|
||||||
$i = 1;
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||||
foreach ($listInfos as $listInfo) {
|
|
||||||
if ($i <= $max_limit) {
|
|
||||||
try {
|
|
||||||
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작");
|
|
||||||
list($listInfo, $urls) = $this->detailPage($listInfo);
|
|
||||||
$this->mediaProcess($urls);
|
|
||||||
//File DB 및 Board DB 등록작업
|
|
||||||
$board_entity = $this->getMyStorage()->getBoard()->createByCrawler($i, $listInfo, $this->_storages);
|
|
||||||
$this->getMyStorage()->getFile()->createByCrawler($board_entity, $this->_storages);
|
|
||||||
$this->getMyStorage()->getImage()->createByCrawler($board_entity, $this->_storages);
|
|
||||||
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료.");
|
|
||||||
$i++;
|
|
||||||
} catch (\Exception $e) {
|
|
||||||
log_message("debug", $e->getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -25,7 +25,7 @@ class YamoonCrawler extends MyCrawler
|
|||||||
}
|
}
|
||||||
return $this->_myStorage;
|
return $this->_myStorage;
|
||||||
}
|
}
|
||||||
private function detailPage(array $listInfo): array
|
protected function detail_page(array $listInfo): array
|
||||||
{
|
{
|
||||||
$response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
|
$response = $this->getMySocket()->getContent("/newboard/yamoonboard/" . $listInfo['detail_url']);
|
||||||
//작성시간
|
//작성시간
|
||||||
@ -35,7 +35,7 @@ class YamoonCrawler extends MyCrawler
|
|||||||
$tag = getenv("yamoon.view.content.tag");
|
$tag = getenv("yamoon.view.content.tag");
|
||||||
return $this->getMediaUrls($response, $tag, $listInfo);
|
return $this->getMediaUrls($response, $tag, $listInfo);
|
||||||
}
|
}
|
||||||
private function listPage(): array
|
protected function list_page(): array
|
||||||
{
|
{
|
||||||
if ($this->getDebug()) {
|
if ($this->getDebug()) {
|
||||||
$listInfos = [
|
$listInfos = [
|
||||||
@ -76,35 +76,17 @@ class YamoonCrawler extends MyCrawler
|
|||||||
log_message("notice", __FUNCTION__ . " 작업 완료");
|
log_message("notice", __FUNCTION__ . " 작업 완료");
|
||||||
return $listInfos;
|
return $listInfos;
|
||||||
}
|
}
|
||||||
public function execute(): void
|
//File DB 및 Board DB 등록작업등
|
||||||
|
protected function backend_process(int $i, array $listInfo, array $storages)
|
||||||
{
|
{
|
||||||
$listInfos = $this->listPage();
|
$board_entity = $this->getMyStorage()->getBoard()->createByCrawler($i, $listInfo, $storages);
|
||||||
//Limit가 0이면 $listInfos 갯수만큼 다하고, LIMIT 갯수 혹은 item의 갯수중 작은수만큼 한다.
|
$this->getMyStorage()->getFile()->createByCrawler($board_entity, $storages);
|
||||||
$max_limit = intval(getenv("yamap.list.max_limit"));
|
$this->getMyStorage()->getImage()->createByCrawler($board_entity, $storages);
|
||||||
if ($max_limit) {
|
}
|
||||||
$max_limit = count($listInfos) <= $max_limit ? count($listInfos) : $max_limit;
|
public function execute(int $max_limit): void
|
||||||
} else {
|
{
|
||||||
$max_limit = count($listInfos);
|
$listInfos = $this->list_page();
|
||||||
}
|
$this->main_process($max_limit, $listInfos);
|
||||||
$total = count($listInfos);
|
log_message("notice", __FUNCTION__ . " 작업이 완료되었습니다.");
|
||||||
$i = 1;
|
|
||||||
foreach ($listInfos as $listInfo) {
|
|
||||||
if ($i <= $max_limit) {
|
|
||||||
try {
|
|
||||||
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업시작");
|
|
||||||
list($listInfo, $urls) = $this->detailPage($listInfo);
|
|
||||||
$this->mediaProcess($urls);
|
|
||||||
//File DB 및 Board DB 등록작업
|
|
||||||
$board_entity = $this->getMyStorage()->getBoard()->createByCrawler($i, $listInfo, $this->_storages);
|
|
||||||
$this->getMyStorage()->getFile()->createByCrawler($board_entity, $this->_storages);
|
|
||||||
$this->getMyStorage()->getImage()->createByCrawler($board_entity, $this->_storages);
|
|
||||||
log_message("notice", "게시물 {$i}번째/{$total}개중 {$listInfo["nickname"]} 작업완료.");
|
|
||||||
$i++;
|
|
||||||
} catch (\Exception $e) {
|
|
||||||
log_message("debug", $e->getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log_message("notice", "Crawler->" . __FUNCTION__ . " 작업이 완료되었습니다.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user