Automation/app/Libraries/CrawlerLibrary.php
2024-08-31 22:24:28 +09:00

63 lines
1.5 KiB
PHP

<?php
namespace App\Libraries;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
class CrawlerLibrary
{
private $_client = null;
private $_host = "";
public function __construct(string $host)
{
$this->_host = $host;
}
final public function getHost(): string
{
return $this->_host;
}
private function getClient(): Client
{
if (is_null($this->_client)) {
$this->_client = new Client(['verify' => false]);
}
return $this->_client;
}
final public function getContent(string $url): string
{
$response = $this->getClient()->request('GET', $this->gethost() . $url);
return $response->getBody()->getContents();
}
final public function getInnerHTML(string $url, $tag = false)
{
$crawler = new Crawler($this->getContent($url));
return $tag ? $crawler->filter($tag)->html() : $crawler->html();
}
final public function getLinks(string $html, string $tag = "a"): array
{
$crawler = new Crawler($html);
return $crawler->filter($tag)->each(
function (Crawler $node) {
return $node->attr("href");
}
);
}
final public function getImages(string $html, $tag = "img"): array
{
$crawler = new Crawler($html);
return $crawler->filter($tag)->each(
function (Crawler $node) {
return $node->attr("src");
}
);
}
}