初学nodejs爬虫,记录一下:
var request = require('request');var iconv = require('iconv-lite'); //转码var cheerio = require('cheerio');var fs = require("fs");var hrefArr = [ 'categories/2017627968.html','categories/2017627967.html','categories/2017627966.html','categories/2017627965.html','categories/2017627964.html','categories/2017627963.html','categories/2017627962.html','categories/2017627961.html','categories/2017627960.html','categories/2017627977.html','categories/2017627976.html','categories/2017627975.html','categories/2017627974.html','categories/2017627973.html','categories/2017627972.html','categories/2017627971.html','categories/2017627970.html','categories/2017627969.html','categories/2017627978.html','categories/2017627983.html','categories/2017627986.html','categories/2017627985.html','categories/2017627984.html','categories/2017627982.html','categories/2017627981.html','categories/2017627980.html','categories/2017627979.html','categories/2017627994.html','categories/2017627993.html','categories/2017627992.html','categories/2017627991.html','categories/2017627990.html','categories/2017627989.html','categories/2017627988.html','categories/2017627987.html','categories/2017627950.html','categories/20176281003.html','categories/20176281002.html','categories/20176281001.html','categories/20176281000.html','categories/2017628999.html','categories/2017628998.html','categories/2017628997.html','categories/2017628996.html','categories/2017628995.html','categories/20176281012.html','categories/20176281011.html','categories/20176281010.html','categories/20176281009.html','categories/20176281008.html','categories/20176281007.html','categories/20176281006.html','categories/20176281005.html','categories/20176281004.html','categories/20176281022.html','categories/20176281021.html','categories/20176281020.html','categories/20176281019.html','categories/20176281018.html','categories/20176281017.html','categories/20176281016.html','categories/20176281015.html','categories/20176281014.html','categories/20176281031.html','categories/20176281030.html','categories/20176281029.html','categories/20176281028.html','categories/20176281027.html','categories/20176281026.html','categories/20176281025.html','categories/20176281024.html','categories/20176281023.html','categories/2017627959.html','categories/2017627958.html','categories/2017627957.html','categories/2017627956.html','categories/2017627955.html','categories/2017627954.html','categories/2017627952.html','categories/2017627951.html','categories/2017627949.html' ]var viewInfo = [];for(var i = 0; i < hrefArr.length; i++ ){ var href = "http://www.yinghexinxi.cn/"+hrefArr[i]; request.get({url:href,encoding:null},function(err,response,body){ var buf = iconv.decode(body, 'gb2312'); var $ = cheerio.load(buf); var data = []; $("[height='22']").each(function(index, element){ var info = $(element).text().trim(); var splitInfo = info.split(":"); data.push(splitInfo[1]); }) var obj = { id: href.replace(/[^0-9]/ig,""), name: data[0], tel: data[1], qq: data[2], date: data[3], area: data[4], email: data[5], location: data[6], ip: data[7], phone: data[8], } viewInfo.push(obj); });}setTimeout(function(){ var data = JSON.stringify(viewInfo) console.log("准备写入文件"); fs.writeFile('hrefInView.json', data, function(err) { if (err) { return console.error(err); } console.log("数据写入成功!"); });},6000)