import axios from "axios"; import * as cheerio from "cheerio"; export interface BoardStruct { uid?: number; title: string; nickname: string; content: string; view: number; recommend?: number; at_created: string; detail_url: string; } export interface Config { host: string; url: string; rows: string; title: string; nickname: string; content: string; } export abstract class MyScrapper { protected debug: boolean = false; protected config: Config = { host: "", url: "", rows: "", title: "", nickname: "", content: "" }; constructor(config:Config,debug: boolean = false) { this.config = config; this.debug = debug; } //추상 메서드 protected abstract extractRow(row: BoardStruct, element: cheerio.Element, cnt: number): BoardStruct; private async getHTML(url: string): Promise { try { const { data } = await axios.get(this.config.host+url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } }); if (this.debug) { console.log("getHTML:"+ url + "=>길이:" + data.length); } return data; } catch (error) { console.error(`Failed to fetch ${url}:`, error); return null; } } private async extractDetail(url: string,selector: string): Promise { const html = await this.getHTML(url); if (!html) return ''; const $ = cheerio.load(html); return $(selector).html() ?? ''; } private getListRow(element: cheerio.Cheerio): BoardStruct { let cnt = 0; let row: BoardStruct = { title: '', nickname: '', content: '', at_created: '', view: 0, recommend: 0, detail_url: '' }; //td 태그를 찾아서 각각의 데이터를 추출 element.find('td').toArray().forEach(tdElement => { row = this.extractRow(row, tdElement, cnt); ++cnt; }); if (this.debug) { console.log("Row:", row); } return row; } private async getListRows(html:string, selector:string): Promise { const $ = cheerio.load(html); // HTML 구조 디버깅을 위한 코드 추가 if (this.debug) { console.log("전체 HTML:", $.html().substring(0, 500)); // 처음 500자만 출력 console.log("검색된 요소 수:", $(selector).length); // 페이지의 모든 div와 id 출력 $('div[id]').each((_, el) => { console.log("발견된 div id:", $(el).attr('id')); }); } // 선택자로 요소 찾기 const rows = $(selector); const lists: BoardStruct[] = []; let i = 1; rows.each((_, element) => { lists.push(this.getListRow($(element))); i++; }); return lists; } public async run(): Promise { const html = await this.getHTML(this.config.url); if (!html) return []; const rows = await this.getListRows(html, this.config.rows); const lists: BoardStruct[] = []; for (const row of rows) { row.content = await this.extractDetail(row.detail_url,this.config.content); lists.push(row); } return lists; } }