Automation init...1

This commit is contained in:
최준흠 2024-09-05 00:09:04 +09:00
parent 1f68f674a5
commit 1c1e64ed84
3 changed files with 66 additions and 38 deletions

View File

@ -11,28 +11,13 @@ class Crawler extends BaseController
{
try {
$isDebug = in_array("debug", $params);
//1. Yamap 사이트에서에서 자유게시판의 게시물 중 작성자가 관리자가 아닌 게시물 검색후
// 리스트중 1번째것의 게시물 내용에 있는 이미지,비디오 정보를 가져오게 하는 기능
$library = new YamapLibrary(getenv("yamap.host"));
$library->setDebug($isDebug);
//1. MainPage
$url = getenv("yamap.list.url");
$crawler = $library->getCrawler($url, getenv("yamap.list.tag"));
$urls = $library->getListURLs(
$crawler,
getenv("yamap.list.item.tag"),
getenv("yamap.list.item.subject.tag"),
getenv("yamap.list.item.nickname.tag"),
getenv("yamap.list.item.nickname.skip")
);
if (!count($urls)) {
throw new \Exception("Target URL이 없습니다.");
}
//2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$url = $isDebug ? getenv("yamap.view.test.url") : $urls[0];
$crawler = $library->getCrawler($url, getenv("yamap.view.content.tag"));
//3. Image
$library->download($crawler, ["tag" => "img", "attr" => "src"]);
//4. Video
$library->download($crawler, ["tag" => "video", "attr" => "src"]);
$library->execute();
//2. 워드프레스에 로그인 처리 기능
//3. 워드프레스의 자유게시판에 게시물 등록 기능
log_message("info", "완료되었습니다.");
return true;
} catch (\Exception $e) {

View File

@ -26,7 +26,7 @@ class YamapLibrary
$this->_debug = $debug;
}
public function getCrawler(string $url, string $tag): Crawler
protected function getCrawler(string $url, string $tag): Crawler
{
$response = $this->getContentByMyWeb($url);
if (!$response) {
@ -35,40 +35,65 @@ class YamapLibrary
return $this->createByMyCrawler($response)->filter($tag);
}
public function getListURLs(
protected function getList(
Crawler $crawler,
string $item_tag,
string $item_subject_tag,
string $item_nickname_tag,
string $item_nickname_skip,
array $urls = []
array $results = []
): array {
//div.bbs_item를 가진 객체를 찾아서 같은 형식의 객체(sibling)를 배열로 넘김
$crawler->filter($item_tag)->each(
function (Crawler $node) use (
&$urls,
$item_subject_tag,
&$item_nickname_tag,
&$item_nickname_skip
&$item_nickname_skip,
&$results
): void {
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자 "관리자" 아닌지 확인 후 Return Bool
//bbs_item에서 span.g_nickname 객체를 찾아서 작성자 "관리자" 아닌지 확인 후 Return Bool
$nickname = $node->filter($item_nickname_tag)->text();
log_message("debug", $item_nickname_tag . ":" . $nickname);
if ($nickname != $item_nickname_skip) {
$options = ["tag" => $item_subject_tag, "attr" => "href"];
$urls = $this->getTagDatasByMyCrawler($node, $options);
//작성자가 "관리자"가 아니 게시물이면 해당 bbs_item에서 a.list_subject 객체를 찾아서
$url = $node->filter($item_subject_tag)->attr("href");
$results[] = ['nickname' => $nickname, 'url' => $url];
}
}
);
return $urls;
return $results;
}
public function download(Crawler $crawler, array $options): void
protected function download(Crawler $crawler, array $options): void
{
log_message("debug", "download:{$options["tag"]},{$options["attr"]}");
$urls = $this->getTagDatasByMyCrawler($crawler, $options);
foreach ($urls as $url) {
$this->downloadByMyWeb($url, $this->getPathByMyStorage(), $this->getDebug());
$nodes = $this->getNodesByMyCrawler($crawler, $options);
foreach ($nodes as $node) {
$this->downloadByMyWeb($node->attr($options["attr"]), $this->getPathByMyStorage(), $this->getDebug());
}
}
public function execute(): void
{
//1. MainPage
$url = getenv("yamap.list.url");
$crawler = $this->getCrawler($url, getenv("yamap.list.tag"));
$lists = $this->getList(
$crawler,
getenv("yamap.list.item.tag"),
getenv("yamap.list.item.subject.tag"),
getenv("yamap.list.item.nickname.tag"),
getenv("yamap.list.item.nickname.skip")
);
if (!count($lists)) {
throw new \Exception("Target URL이 없습니다.");
}
//2. TargetPage : div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
$url = $this->getDebug() ? getenv("yamap.view.test.url") : $lists[0]["url"];
$crawler = $this->getCrawler($url, getenv("yamap.view.content.tag"));
//3. Image
$this->download($crawler, ["tag" => "img", "attr" => "src"]);
//4. Video
$this->download($crawler, ["tag" => "video", "attr" => "src"]);
}
}

View File

@ -10,14 +10,32 @@ trait MyCrawlerTrait
{
return new Crawler($html);
}
public function getTagDatasByMyCrawler(Crawler $crawler, array $options = ["tag" => "a", "attr" => "href"], array $tagdatas = []): array
public function getNodesByMyCrawler(Crawler $crawler, array $options, $nodes = []): array
{
$crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$tagdatas, &$options): void {
log_message("debug", sprintf("getTagDatas-> %s:%s", $options["tag"], $node->attr($options["attr"])));
$tagdatas[] = $node->attr($options["attr"]);
function (Crawler $node) use (&$options, &$nodes): void {
foreach (array_keys($options) as $key) {
switch ($key) {
case 'find':
if ($node->text() == $options[$key]) {
log_message("debug", sprintf("getNodeByMyCrawler-> %s:%s", $options["tag"], $options[$key]));
$nodes[] = $node;
}
break;
case 'except':
if ($node->text() != $options[$key]) {
log_message("debug", sprintf("getNodeByMyCrawler-> %s:%s", $options["tag"], $options[$key]));
$nodes[] = $node;
}
break;
default:
log_message("debug", sprintf("getNodeByMyCrawler-> %s", $options["tag"]));
$nodes[] = $node;
break;
}
}
}
);
return $tagdatas;
return $nodes;
}
}