Automation/app/Libraries/MyCrawler/MyCrawlerLibrary.php
2024-09-07 19:00:11 +09:00

101 lines
3.5 KiB
PHP

<?php
namespace App\Libraries\MyCrawler;
use Symfony\Component\DomCrawler\Crawler;
use App\Libraries\CommonLibrary;
abstract class MyCrawlerLibrary extends CommonLibrary
{
protected function __construct()
{
parent::__construct();
}
abstract public function getMySocket();
abstract public function getMyStorage();
abstract public function execute(): array;
final protected function getContent(string $url, string $tag): Crawler
{
$response = $this->getMySocket()->getContent($url);
if (!$response) {
throw new \Exception("getCrawler 실패:{$url}");
}
$crawler = new Crawler($response);
return $crawler->filter($tag);
}
final protected function getNodes(Crawler $crawler, array $options, $nodes = []): array
{
$crawler->filter($options["tag"])->each(
function (Crawler $node) use (&$options, &$nodes): void {
log_message("debug", sprintf("getNode-> %s", $options["tag"]));
$nodes[] = $node;
}
);
return $nodes;
}
final protected function download(Crawler $crawler, array $options): array
{
$downloadInfos = [];
$nodes = $this->getNodes($crawler, $options);
foreach ($nodes as $node) {
$downloadInfos[] = $this->getMySocket()->download($node->attr($options["attr"]));
}
return $downloadInfos;
}
final protected function save(array $downloadInfos, $fileInfos = []): array
{
foreach ($downloadInfos as $downloadInfo) {
$this->getMyStorage()->setFileName($downloadInfo['fileName']);
if (!$this->getMyStorage()->save($downloadInfo['content'])) {
continue;
}
$fileInfos[] = [
"url" => $downloadInfo['url'],
"path" => $this->getMyStorage()->getPath(),
"fileType" => $this->getMyStorage()->getFieType(),
"fileName" => $this->getMyStorage()->getFieName(),
];
}
return $fileInfos;
}
final protected function getMediaTags(array $fileInfos, array $mediaTags = []): array
{
switch ($fileInfos['fileType']) {
case "jpeg":
if ($this->getMySocket()->isContainsHttpOrHttps($fileInfos['orignal'])) {
$mediaTags[] = $fileInfos['orignal'];
} else {
$mediaTags[] = sprintf(
"<img src=\"/%s/%s/%s\" alt=\"%s\">",
$this->getMyStorage()->getUploadPath(),
$fileInfos["path"],
$fileInfos["fileName"],
$fileInfos["fileName"]
);
}
break;
case "mp4":
if ($this->getMySocket()->isContainsHttpOrHttps($fileInfos['orignal'])) {
$mediaTags[] = $fileInfos['orignal'];
} else {
$mediaTags[] = sprintf(
"<video src=\"/%s/%s/%s\" alt=\"%s\">",
$this->getMyStorage()->getUploadPath(),
$fileInfos["path"],
$fileInfos["fileName"],
$fileInfos["fileName"]
);
}
break;
default:
$mediaTags[] = $fileInfos['orignal'];
break;
}
return $mediaTags;
}
}