Automation init...
This commit is contained in:
parent
d3b9be4ab6
commit
f3f8c0c6cd
@ -2,24 +2,32 @@
|
||||
|
||||
namespace App\Controllers\CLI;
|
||||
|
||||
use App\Libraries\CrawlerLibrary;
|
||||
use App\Controllers\BaseController;
|
||||
use App\Libraries\MyCrawler\YamapLibrary;
|
||||
|
||||
class Crawler extends BaseController
|
||||
{
|
||||
public function yamap()
|
||||
{
|
||||
try {
|
||||
$library = new CrawlerLibrary("https://www.yamap16.com");
|
||||
echo "Host-> " . $library->getHost() . "\n";
|
||||
// $html = $library->getInnerHTML("/Board/List.aspx?id=free&ca=1");
|
||||
// $links = $library->getLinks($html, "a.list_subject");
|
||||
$url = "/Board/View.aspx?id=free&ca=1&rno=192681&page=1";
|
||||
$html = $library->getInnerHTML($url, "div.contents p");
|
||||
$images = $library->getImages($html);
|
||||
$library = new YamapLibrary("https://www.yamap16.com");
|
||||
$mainPage = $library->getContent("/Board/List.aspx?id=free");
|
||||
$links = $library->getLinks($mainPage);
|
||||
|
||||
//Image형식이나 , Viedeo형식의 Content를 가지고 있으면
|
||||
log_message("debug", "viewLink-> " . $links[0]["href"]);
|
||||
$viewPage = $library->getContent($links[0]["href"]);
|
||||
// $viewPage = $library->getContent("/Board/View.aspx?id=free&ca=&rno=193046&page=1"); //Image
|
||||
// $viewPage = $library->getContent("/Board/View.aspx?id=free&ca=&rno=193055&page=1"); //Video
|
||||
$images = $library->getImages($viewPage);
|
||||
foreach ($images as $image) {
|
||||
echo "Image-> " . $image . "\n";
|
||||
$library->download($image);
|
||||
log_message("debug", "Image-> " . $image['src']);
|
||||
$library->download($image['src']);
|
||||
}
|
||||
$videos = $library->getVideos($viewPage);
|
||||
foreach ($videos as $video) {
|
||||
log_message("debug", "Video-> " . $video['src']);
|
||||
$library->download($video['src']);
|
||||
}
|
||||
log_message("info", "완료되었습니다.");
|
||||
return true;
|
||||
|
||||
@ -1,45 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries;
|
||||
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class CrawlerLibrary extends WebBaseLibrary
|
||||
{
|
||||
public function __construct(string $host)
|
||||
{
|
||||
parent::__construct($host);
|
||||
}
|
||||
|
||||
final public function getContent(string $url): string
|
||||
{
|
||||
$response = $this->getClient()->request('GET', $this->gethost() . $url);
|
||||
return $response->getBody()->getContents();
|
||||
}
|
||||
|
||||
final public function getInnerHTML(string $url, $tag = false)
|
||||
{
|
||||
$crawler = new Crawler($this->getContent($url));
|
||||
return $tag ? $crawler->filter($tag)->html() : $crawler->html();
|
||||
}
|
||||
|
||||
final public function getLinks(string $html, string $tag = "a"): array
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
return $crawler->filter($tag)->each(
|
||||
function (Crawler $node) {
|
||||
return $node->attr("href");
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
final public function getImages(string $html, $tag = "img"): array
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
return $crawler->filter($tag)->each(
|
||||
function (Crawler $node) {
|
||||
return $node->attr("src");
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -5,7 +5,7 @@ namespace App\Libraries;
|
||||
use GuzzleHttp\Cookie\CookieJar;
|
||||
use GuzzleHttp\Client;
|
||||
|
||||
abstract class WebBaseLibrary
|
||||
abstract class MyBaseLibrary
|
||||
{
|
||||
private $_host = "";
|
||||
private $_client = null;
|
||||
@ -36,8 +36,13 @@ abstract class WebBaseLibrary
|
||||
return $this->_cookieJar;
|
||||
}
|
||||
|
||||
final public function getContent(string $url): string
|
||||
{
|
||||
return $this->getClient()->get($this->gethost() . $url)->getBody();
|
||||
}
|
||||
|
||||
// 로그인 메서드
|
||||
public function login($url, $username, $password)
|
||||
final public function login($url, $username, $password)
|
||||
{
|
||||
try {
|
||||
$response = $this->getClient()->post($this->gethost() . $url, [
|
||||
@ -61,22 +66,23 @@ abstract class WebBaseLibrary
|
||||
}
|
||||
|
||||
// 파일 다운로드 메서드
|
||||
public function download($url, $addPath = false)
|
||||
final public function download($url, $path = false)
|
||||
{
|
||||
try {
|
||||
$fullPath = WRITEPATH . "uploads";
|
||||
$fullPath .= !$addPath ? '' : DIRECTORY_SEPARATOR . $addPath;
|
||||
if (!is_dir($fullPath)) {
|
||||
mkdir($fullPath);
|
||||
$fileNames = explode('/', $url);
|
||||
if (!is_array($fileNames) || !count($fileNames)) {
|
||||
throw new \Exception("Download URL Error:" . $url);
|
||||
}
|
||||
$temps = explode('/', $url);
|
||||
if (!is_array($temps) || !count($temps)) {
|
||||
throw new \Exception("URL error:" . var_dump($temps, true));
|
||||
$storagePath = WRITEPATH . "uploads";
|
||||
$storagePath .= !$path ? '' : DIRECTORY_SEPARATOR . $path;
|
||||
if (!is_dir($storagePath)) {
|
||||
if (!mkdir($storagePath)) {
|
||||
throw new \Exception("Make Directory Error:" . $storagePath);
|
||||
}
|
||||
}
|
||||
$file = $fullPath . DIRECTORY_SEPARATOR . array_pop($temps);
|
||||
$response = $this->getClient()->get($this->gethost() . $url, [
|
||||
'cookies' => $this->getCookieJar(),
|
||||
'sink' => $file,
|
||||
'sink' => $storagePath . DIRECTORY_SEPARATOR . array_pop($fileNames),
|
||||
]);
|
||||
if ($response->getStatusCode() == 200) {
|
||||
log_message("info", "파일이 성공적으로 다운로드되었습니다!");
|
||||
42
app/Libraries/MyCrawler/MyCrawlerLibrary.php
Normal file
42
app/Libraries/MyCrawler/MyCrawlerLibrary.php
Normal file
@ -0,0 +1,42 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler;
|
||||
|
||||
use App\Libraries\MyBaseLibrary;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
abstract class MyCrawlerLibrary extends MyBaseLibrary
|
||||
{
|
||||
protected function __construct(string $host)
|
||||
{
|
||||
parent::__construct($host);
|
||||
}
|
||||
|
||||
final public function getCrawler($html)
|
||||
{
|
||||
return new Crawler($html);
|
||||
}
|
||||
|
||||
final public function getInnerHTML(string $html, $tag = false)
|
||||
{
|
||||
return $tag ? $this->getCrawler($html)->filter($tag)->html() : $this->getCrawler($html)->html();
|
||||
}
|
||||
|
||||
public function getLinks(string $html, array $options = ["tag" => "a", "attr" => "href"]): array
|
||||
{
|
||||
return $this->getCrawler($html)->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return ["anchor" => $node->text(), "href" => $node->attr($options["attr"])];
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
public function getImages(string $html, array $options = ["tag" => "img", "attr" => "src"]): array
|
||||
{
|
||||
return $this->getCrawler($html)->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return ["alt" => $node->text(), "src" => $node->attr($options["attr"])];
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
55
app/Libraries/MyCrawler/YamapLibrary.php
Normal file
55
app/Libraries/MyCrawler/YamapLibrary.php
Normal file
@ -0,0 +1,55 @@
|
||||
<?php
|
||||
|
||||
namespace App\Libraries\MyCrawler;
|
||||
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
|
||||
class YamapLibrary extends MyCrawlerLibrary
|
||||
{
|
||||
public function __construct(string $host)
|
||||
{
|
||||
parent::__construct($host);
|
||||
}
|
||||
|
||||
public function getLinks($html, array $options = ["skip" => "관리자"]): array
|
||||
{
|
||||
//div.bbs_item를 가진 객체를 찾아서 배열로 넘김
|
||||
$domElements = $this->getCrawler($html)->filter("div.bbs_list div.bbs_item")->first()->siblings();
|
||||
$links = [];
|
||||
foreach ($domElements as $domElement) {
|
||||
$this->getCrawler($domElement)->filter("span.g_nickname")->each(function (Crawler $node) use (&$options, &$links, &$domElement) {
|
||||
if ($node->text() != $options["skip"]) {
|
||||
$links[] = ["anchor" => $node->text(), "href" => $this->getCrawler($domElement)->filter("a.list_subject")->attr("href")];
|
||||
}
|
||||
});
|
||||
}
|
||||
return $links;
|
||||
}
|
||||
|
||||
public function getImages(string $html, array $options = ["tag" => "img", "attr" => "src"]): array
|
||||
{
|
||||
//div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
|
||||
$domElement = $this->getCrawler($html)->filter("div.contents")->first();
|
||||
return $domElement->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return [
|
||||
"alt" => $node->attr('alt'),
|
||||
"src" => $node->attr($options["attr"])
|
||||
];
|
||||
}
|
||||
);
|
||||
}
|
||||
public function getVideos(string $html, array $options = ["tag" => "video", "attr" => "src"]): array
|
||||
{
|
||||
//div.contents 가진 객체를 찾아서 첫번쨰 요소에서만 참조
|
||||
$domElement = $this->getCrawler($html)->filter("div.contents")->first();
|
||||
return $domElement->filter($options["tag"])->each(
|
||||
function (Crawler $node) use (&$options) {
|
||||
return [
|
||||
"alt" => $node->attr('alt'),
|
||||
"src" => $node->attr($options["attr"])
|
||||
];
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user