抓取网页萃取网页内容的代码
发表于:2007-06-30来源:作者:点击数:
标签:
dim sUrl ?? sUrl="http://travel.state.gov/visa/frvi_bulletincurrent.html" ?? Function streamtochar(StrStream) ?? set stream=CreateObject("ADODB.Stream") ?? stream.type=1 ?? stream.Mode=3 ?? stream.Open ?? stream.Write Strstrea
dim sUrl
?? sUrl="http://travel.state.gov/visa/frvi_bulletincurrent.html"
?? Function streamtochar(StrStream)
?? set stream=CreateObject("ADODB.Stream")
?? stream.type=1
?? stream.Mode=3
?? stream.Open
?? stream.Write Strstream
?? stream.Position= 0
?? stream.Type= 2
?? stream.Charset="gb2312"
?? streamtochar= stream.ReadText
?? stream.Close
?? set stream=nothing
?? End Function
?? i = i + 1
?? function getContentByUrl(url)
?? set XmlHttp = CreateObject("MSXML2.XMLHTTP")
?? XmlHttp.open "GET",url,false
?? XmlHttp.send
?? getContentByUrl = streamtochar(oXmlHttp.responseBody)
?? set XmlHttp=nothing
?? end function
??
?? function getRealContent(url)
?? sContent = getContentByUrl(url)
?? getRealContent=sContent
?? end function
??
??html= getContentByUrl(surl)
?? url_start=inStr(html," " )
?? url_end=inStr(html," ")
?? url=Mid(html,url_start,url_end-url_start)
?? url=replace(url,"“)
??
?? Date_start=inStr(html,"Washington, D.C. ")+57
?? Date_end=inStr(html," A. STATUTORY")-14
?? Date_T=Mid(html,Date_start,Date_end-Date_start)
原文转自:http://www.ltesting.net