"1、输入url目标网页地址,返回值getHTTPPage是目标网页的html代码 function getHTTPPage(url) dim Http set Http=server.createobject("MSXML2.XMLHTTP" Http.open "GET",url,false Http.send() if Http.readystate<>4 then exit function end if getHTTPPage=bytesToBSTR(Http.responseBody,"GB2312" set http=nothing if err.number<>0 then err.Clear end function
"2、转换乱玛,直接用xmlhttp调用有中文字符的网页得到的将是乱玛,可以通过adodb.stream组件进行转换 Function BytesToBstr(body,Cset) dim objstream set objstream = Server.CreateObject("adodb.stream" objstream.Type = 1 objstream.Mode =3 objstream.Open objstream.Write body objstream.Position = 0 objstream.Type = 2 objstream.Charset = Cset BytesToBstr = objstream.ReadText objstream.Close set objstream = nothing End Function
"下面试着调用http://wmjie.51.net/swords的html内容 Dim Url,Html Url="http://wmjie.51.net/swords/" Html = getHTTPPage(Url) Response.write Html %>
" Add a header to give it a file name: Response.AddHeader "Content-Disposition", _ "attachment;filename=mitchell-pres.zip"
" Specify the content type to tell the browser what to do: Response.ContentType = "application/zip"
" Binarywrite the bytes to the browser Response.BinaryWrite xml.responseBody
Set xml = Nothing %>
------------------------------------- 如何写ASP入库小偷程序 入库小偷的原理也很简单:就是用XMLHTTP远程读取网页的内容,然后根据需要,对读到的内容进行加工(过滤,替换,分类),最后得到自己需要的数据,加入到数据库中。 首先:我们先用XMLHTTP读取远程网页(我的另一片文章中有介绍)。 其次:对内容进行过滤,这个是比较关键的步骤,比如说,我要从远程网页上提取出所有url连接,我应该怎么做呢? 代码: ‘这里用的是正则式 Set objRegExp = New Regexp "建立对象 objRegExp.IgnoreCase = True "大小写忽略 objRegExp.Global = True "全局为真 objRegExp.Pattern = "http://.+?" "匹配字段 set mm=objRegExp.Execute(str) "执行查找,str为输入参数 For Each Match in mm "进入循环 Response.write(Match.Value) "输出url地址 next
然后,我们需要根据需要做一些替换功能,把不必要的数据替换掉,这个比较简单,用Replace函数即可。 最后,进行数据库操作 ------------------------------- 一个例子 代码: <% On Error Resume Next Server.ScriptTimeOut=9999999 Function getHTTPPage(Path) t = GetBody(Path) getHTTPPage=BytesToBstr(t,"GB2312" End function
Function GetBody(url) on error resume next Set Retrieval = CreateObject("Microsoft.XMLHTTP" With Retrieval .Open "Get", url, False, "", "" .Send GetBody = .ResponseBody End With Set Retrieval = Nothing End Function
"然后调用XMLHTTP组件创建一个对象并进行初始化设置。
Function BytesToBstr(body,Cset) dim objstream set objstream = Server.CreateObject("adodb.stream" objstream.Type = 1 objstream.Mode =3 objstream.Open objstream.Write body objstream.Position = 0 objstream.Type = 2 objstream.Charset = Cset BytesToBstr = objstream.ReadText objstream.Close set objstream = nothing End Function
Function Newstring(wstr,strng) Newstring=Instr(lcase(wstr),lcase(strng)) if Newstring<=0 then Newstring=Len(wstr) End Function
"处理抓取回来的数据需要调用adodb.stream组件并进行初始化设置。%>
"以下即为页面显示部分
<% Dim wstr,str,url,start,over,city "定义一些需要使用到的变量
city = Request.QueryString("id") "程序传回的ID变量(即用户选择的城市)赋给id
body = replace(body,"skin1","天气预报 - 斯克网络") body = replace(body,"http://appnews.qq.com/cgi-bin/news_qq_search?city","tianqi.asp?id")
"本程序中已经完成了替换的工作,如果有其他需要的话可以继续进行类似的替换操作。
response.write body 引用: 远程获取内容,并将内容存在本地电脑上,包括任何文件
<% "----------远程获取内容,并将内容存在本地电脑上,包括任何文件!---------- "On Error Resume Next "Set the content type to the specific type that you are sending. "Response.ContentType = "IMAGE/JPEG" "-------------------------------定义输出格式-----------------------------
Path=request.querystring("p") sPath = Path if left(lcase(path),7) <> "http://"; then "-------------如果前面没有http就是本地文件,交给LocalFile处理------------ LocalFile(path) else "--------------------否则为远程文件,交给RemoteFile处理------------------ RemoteFile(Path) end if "Response.Write err.Description
sub LocalFile(Path) "-------------------如果为本地文件则简单的跳转到该页面------------------- Response.Redirect Path End Sub
Sub RemoteFile(sPath) "-------------------------处理远程文件函数------------------------------ FileName = GetFileName(sPath) "-------------GetFileName为把地址转换为合格的文件名过程------------- FileName = Server.MapPath("/UploadFile/Cache/" & FileName) Set objFso = Server.CreateObject("Scripting.FileSystemObject") "Response.Write fileName if objFso.FileExists(FileName) Then "--------------检查文件是否是已经访问过,如是,则简单跳转------------ Response.Redirect "/uploadfile/cache/" & GetFileName(path) Else "----------------否则的话就先用GetBody函数读取---------------------- "Response.Write Path t = GetBody(Path) "-----------------用二进制方法写到浏览器上-------------------------- Response.BinaryWrite t Response.Flush "-----------------输出缓冲------------------------------------------ SaveFile t,GetFileName(path) "------------------将文件内容缓存到本地路径,以待下次访问----------- End if Set objFso = Nothing End Sub
Function GetBody(url) "-----------------------本函数为远程获取内容的函数--------------------- "on error resume next "Response.Write url Set Retrieval = CreateObject("Microsoft.XMLHTTP") "----------------------建立XMLHTTP对象----------------------------- With Retrieval .Open "Get", url, False, "", "" "------------------用Get,异步的方法发送----------------------- .Send "GetBody = .ResponseText GetBody = .ResponseBody "------------------函数返回获取的内容-------------------------- End With Set Retrieval = Nothing "response.Write err.Description End Function
Function GetFileName(str) "-------------------------本函数为合格化的文件名函数------------------- str = Replace(lcase(str),"http://";,"") str = Replace(lcase(str),"//","/") str = Replace(str,"/","") str = replace(str,vbcrlf,"") GetFileName = str End Function
sub SaveFile(str,fName) "-------------------------本函数为将流内容存盘的函数------------------- "on error resume next Set objStream = Server.CreateObject("ADODB.Stream") "--------------建立ADODB.Stream对象,必须要ADO 2.5以上版本--------- objStream.Type = adTypeBinary "-------------以二进制模式打开------------------------------------- objStream.Open objstream.write str "--------------------将字符串内容写入缓冲-------------------------- "response.Write fname objstream.SaveToFile "c:inetpubmywebuploadfilecache" & fName,adSaveCreateOverWrite "--------------------将缓冲的内容写入文件-------------------------- "response.BinaryWrite objstream.Read objstream.Close() set objstream = nothing "-----------------------关闭对象,释放资源------------------------- "response.Write err.Description End sub %>