59 lines
1.3 KiB
PHP
59 lines
1.3 KiB
PHP
<?php
|
|
|
|
namespace App\Libraries;
|
|
|
|
use GuzzleHttp\Client;
|
|
use Symfony\Component\DomCrawler\Crawler;
|
|
|
|
class CrawlerLibrary
|
|
{
|
|
private $_client = null;
|
|
private $_host = "";
|
|
public function __construct(string $host)
|
|
{
|
|
$this->_host = $host;
|
|
}
|
|
|
|
final public function getHost(): string
|
|
{
|
|
return $this->_host;
|
|
}
|
|
|
|
private function getClient(): Client
|
|
{
|
|
if (is_null($this->_client)) {
|
|
$this->_client = new Client(['verify' => false]);
|
|
}
|
|
return $this->_client;
|
|
}
|
|
|
|
private function getCrawler(string $url): Crawler
|
|
{
|
|
$response = $this->getClient()->request('GET', $this->gethost() . $url);
|
|
return new Crawler($response->getBody()->getContents());
|
|
}
|
|
|
|
final public function getNode(string $url, string $tag): Crawler
|
|
{
|
|
return $this->getCrawler($url)->filter($tag);
|
|
}
|
|
|
|
final public function getLinks(string $url, string $tag = "a"): array
|
|
{
|
|
return $this->getNode($url, $tag)->each(
|
|
function (Crawler $node) {
|
|
return $node->attr("href");
|
|
}
|
|
);
|
|
}
|
|
|
|
final public function getImages(string $url, $tag = "img"): array
|
|
{
|
|
return $this->getNode($url, $tag)->each(
|
|
function (Crawler $node) {
|
|
return $node->attr("src");
|
|
}
|
|
);
|
|
}
|
|
}
|