var http=require("http");var cheerio=require("cheerio");//页面获取到的数据模块var url="http://www.jcpeixun.com/lesson/1512/";function filterData(html){/*所要获取到的目标数组 var courseData=[{chapterTitle:"",videosData:{videoTitle:title,videoId:id,videoPrice:price}}] */var $=cheerio.load(html);var courseData=[];var chapters=$(".list-collapse");chapters.each(function(item){var chapterTitle=$(this).find(".collapse-head").find("label").text();var videos=$(this).find(".listview5").children("li");var chaptersData={chaptersTitle:chapterTitle,videosData:[]}videos.each(function(item){var videoTitle=$(this).find(".ml10").attr("data-lesson-name");var videoId=$(this).find(".ml10").attr("data-lesson-id");var vadeoPrice=$(this).find(".colblue").text();chaptersData.videosData.push({title:videoTitle,id:videoId,price:vadeoPrice})})courseData.push(chaptersData) })return courseData}function printCourseInfo(courseData){courseData.forEach(function(item){console.log(item.chaptersTitle+"
");item.videosData.forEach(function(item){console.log(item.title+"【"+item.id+"】"+item.price+"
")})})}http.get(url,function(res){html="";res.on("data",function(data){html+=data})res.on("end",function(){var courseData=filterData(html);printCourseInfo(courseData)})})效果图:
以上就是nodeJs爬虫获取数据的相关代码,希望对大家的学习有所帮助。