const https = require('https'); const cheerio = require('cheerio'); const http = require('http'); const fs = require('fs'); async function getHttp(url) { return new Promise((resolve, reject) => { http.get(url, (res) => { let rawData = ''; res.on('data', (chunk) => { rawData += chunk; }) res.on('end', async() => { resolve(rawData); }); }) }) } //Lấy link tất các link trong list url async function listURLOf() { let html = await getHttp('http://thuoctot24h.com/'); const $ = await cheerio.load(html); var listURL1 = []; const temp = $('#header-row-29 > div > div > div > div > ul > li:nth-child(3) > ul > li:nth-child(1)'); while (temp[0].name == 'li') { listURL1.push(temp[0].children[1].attribs.href); if (temp[0].next == null) break; temp[0] = temp[0].next; } return listURL1; } // Lấy các link cac page trong list url async function listURLOf1(link) { var listURL = []; let html = await getHttp(link); const $ = await cheerio.load(html); const selecTenSP = $('div.list-item'); for (var i = 0; i < selecTenSP.length; i++) { listURL.push(selecTenSP[i].children[0].children[0].children[1].attribs.href); } return listURL; } //lay thông tin sản phẩm async function inforProduct(link) { try { let html = await getHttp(link); const $ = await cheerio.load(html); //ten thuoc const selecTenSP = $('h1.title-name-product'); var tenSP = selecTenSP[0].children[0].data; console.log(tenSP); //Gia const selecGiaSP = $('span.lb-price.price-main'); if (selecGiaSP[0] != null) { var giaSP = selecGiaSP[0].children[0].data; } else { var giaSP = null; } var selec = $('#content > h2'); var motChiMuc = []; for (var i = 0; i < selec.length; i++) { var line = []; while (selec[i].next.name == 'p') { if (selec[i].next.children[0].data != null) line.push(selec[i].next.children[0].data); selec[i] = selec[i].next; if (selec[i].next == null) break; } motChiMuc.push(line); } //Thanh phan var listThanhPhan = []; const temp = $('#content > table'); if (temp[0] != null) { const selecTable = $('#content > table>tbody>tr:nth-child(1)'); var line = []; while (selecTable[0].name == 'tr') { for (var i = 0; i < selecTable[0].children.length; i++) { line.push(selecTable[0].children[i].children[0].data); } if (selecTable[0].next == null) break; selecTable[0] = selecTable[0].next; } //end while listThanhPhan.push(line); } else { listThanhPhan = motChiMuc[1]; } var congDung = motChiMuc[0]; var doiTuongSD = motChiMuc[2]; var cachSD = motChiMuc[3]; var nguonGoc = motChiMuc[4]; var data = { ten: tenSP, gia: giaSP, thanhPhan: listThanhPhan, congDung: congDung, doiTuongSD: doiTuongSD, cachSuDung: cachSD, thongTinSP: nguonGoc }; const str = JSON.stringify(data); console.log(str); console.log('Mot san pham-----------'); return str; } catch (error) { console.log(motPhanTu); console.log(error); } } async function main() { var inforAll = []; var list = await listURLOf(); list01 = []; for (var itemp = 0; itemp < list.length; itemp++) { var listURL = await listURLOf1(list[itemp]); list01.push(listURL); } list01.forEach(async function(motPhanTu) { for (var i = 0; i < motPhanTu.length; i++) { var thongTinMotSanPham = await inforProduct(motPhanTu[i]); inforAll.push(thongTinMotSanPham); } }) } //end main main();