Automation init...1

This commit is contained in:
최준흠 2024-09-05 00:09:04 +09:00
parent 1f68f674a5
commit 1c1e64ed84
3 changed files with 66 additions and 38 deletions

View File

@ -11,28 +11,13 @@ class Crawler extends BaseController
{ {
try { try {
$isDebug = in_array("debug", $params); $isDebug = in_array("debug", $params);
//1. Yamap 사이트에서에서 자유게시판의 게시물 중 작성자가 관리자가 아닌 게시물 검색후
// 리스트중 1번째것의 게시물 내용에 있는 이미지,비디오 정보를 가져오게 하는 기능
$library = new YamapLibrary(getenv("yamap.host")); $library = new YamapLibrary(getenv("yamap.host"));
$library->setDebug($isDebug); $library->setDebug($isDebug);
//1. MainPage $library->execute();
$url = getenv("yamap.list.url"); //2. 워드프레스에 로그인 처리 기능
$crawler = $library->getCrawler($url, getenv("yamap.list.tag")); //3. 워드프레스의 자유게시판에 게시물 등록 기능
$urls = $library->getListURLs(
$crawler,
getenv("yamap.list.item.tag"),
getenv("yamap.list.item.subject.tag"),
getenv("yamap.list.item.nickname.tag"),
getenv("yamap.list.item.nickname.skip")
);
if (!count($urls)) {
throw new \Exception("Target URL이 없습니다.");
}
//2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$url = $isDebug ? getenv("yamap.view.test.url") : $urls[0];
$crawler = $library->getCrawler($url, getenv("yamap.view.content.tag"));
//3. Image
$library->download($crawler, ["tag" => "img", "attr" => "src"]);
//4. Video
$library->download($crawler, ["tag" => "video", "attr" => "src"]);
log_message("info", "완료되었습니다."); log_message("info", "완료되었습니다.");
return true; return true;
} catch (\Exception $e) { } catch (\Exception $e) {

View File

@ -26,7 +26,7 @@ class YamapLibrary
$this->_debug = $debug; $this->_debug = $debug;
} }
public function getCrawler(string $url, string $tag): Crawler protected function getCrawler(string $url, string $tag): Crawler
{ {
$response = $this->getContentByMyWeb($url); $response = $this->getContentByMyWeb($url);
if (!$response) { if (!$response) {
@ -35,40 +35,65 @@ class YamapLibrary
return $this->createByMyCrawler($response)->filter($tag); return $this->createByMyCrawler($response)->filter($tag);
} }
public function getListURLs( protected function getList(
Crawler $crawler, Crawler $crawler,
string $item_tag, string $item_tag,
string $item_subject_tag, string $item_subject_tag,
string $item_nickname_tag, string $item_nickname_tag,
string $item_nickname_skip, string $item_nickname_skip,
array $urls = [] array $results = []
): array { ): array {
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김 //div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$crawler->filter($item_tag)->each( $crawler->filter($item_tag)->each(
function (Crawler $node) use ( function (Crawler $node) use (
&$urls,
$item_subject_tag, $item_subject_tag,
&$item_nickname_tag, &$item_nickname_tag,
&$item_nickname_skip &$item_nickname_skip,
&$results
): void { ): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자 "관리자" 아닌지 확인 후 Return Bool //bbs_item에서 span.g_nickname 객체를 찾아서 작성자 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter($item_nickname_tag)->text(); $nickname = $node->filter($item_nickname_tag)->text();
log_message("debug", $item_nickname_tag . ":" . $nickname); log_message("debug", $item_nickname_tag . ":" . $nickname);
if ($nickname != $item_nickname_skip) { if ($nickname != $item_nickname_skip) {
$options = ["tag" => $item_subject_tag, "attr" => "href"]; //작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$urls = $this->getTagDatasByMyCrawler($node, $options); $url = $node->filter($item_subject_tag)->attr("href");
$results[] = ['nickname' => $nickname, 'url' => $url];
} }
} }
); );
return $urls; return $results;
} }
public function download(Crawler $crawler, array $options): void protected function download(Crawler $crawler, array $options): void
{ {
log_message("debug", "download:{$options["tag"]},{$options["attr"]}"); log_message("debug", "download:{$options["tag"]},{$options["attr"]}");
$urls = $this->getTagDatasByMyCrawler($crawler, $options); $nodes = $this->getNodesByMyCrawler($crawler, $options);
foreach ($urls as $url) { foreach ($nodes as $node) {
$this->downloadByMyWeb($url, $this->getPathByMyStorage(), $this->getDebug()); $this->downloadByMyWeb($node->attr($options["attr"]), $this->getPathByMyStorage(), $this->getDebug());
} }
} }
public function execute(): void
{
//1. MainPage
$url = getenv("yamap.list.url");
$crawler = $this->getCrawler($url, getenv("yamap.list.tag"));
$lists = $this->getList(
$crawler,
getenv("yamap.list.item.tag"),
getenv("yamap.list.item.subject.tag"),
getenv("yamap.list.item.nickname.tag"),
getenv("yamap.list.item.nickname.skip")
);
if (!count($lists)) {
throw new \Exception("Target URL이 없습니다.");
}
//2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$url = $this->getDebug() ? getenv("yamap.view.test.url") : $lists[0]["url"];
$crawler = $this->getCrawler($url, getenv("yamap.view.content.tag"));
//3. Image
$this->download($crawler, ["tag" => "img", "attr" => "src"]);
//4. Video
$this->download($crawler, ["tag" => "video", "attr" => "src"]);
}
} }

View File

@ -10,14 +10,32 @@ trait MyCrawlerTrait
{ {
return new Crawler($html); return new Crawler($html);
} }
public function getTagDatasByMyCrawler(Crawler $crawler, array $options = ["tag" => "a", "attr" => "href"], array $tagdatas = []): array public function getNodesByMyCrawler(Crawler $crawler, array $options, $nodes = []): array
{ {
$crawler->filter($options["tag"])->each( $crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$tagdatas, &$options): void { function (Crawler $node) use (&$options, &$nodes): void {
log_message("debug", sprintf("getTagDatas-> %s:%s", $options["tag"], $node->attr($options["attr"]))); foreach (array_keys($options) as $key) {
$tagdatas[] = $node->attr($options["attr"]); switch ($key) {
case 'find':
if ($node->text() == $options[$key]) {
log_message("debug", sprintf("getNodeByMyCrawler-> %s:%s", $options["tag"], $options[$key]));
$nodes[] = $node;
}
break;
case 'except':
if ($node->text() != $options[$key]) {
log_message("debug", sprintf("getNodeByMyCrawler-> %s:%s", $options["tag"], $options[$key]));
$nodes[] = $node;
}
break;
default:
log_message("debug", sprintf("getNodeByMyCrawler-> %s", $options["tag"]));
$nodes[] = $node;
break;
}
}
} }
); );
return $tagdatas; return $nodes;
} }
} }