XMLHTTP抓取远程数据的后期处理

发表于:2007-06-30来源:作者:点击数: 标签:
% @#作者信息: @#昵称:小灰 @#QQ:103895 @#http://asp2004.net @#http://blog.csdn.net/iuhxq hehe = Hello(http://mmsg.qq.com/cgi-bin/gddylist?Type=13Sort=1Page=3, html, /html, .*( td width=35% bgcolor=#[\dABCDE]{6}(.*)/td)[.\n]*, font style=font
<%

@#作者信息:
@#昵称:小灰
@#QQ:103895
@#http://asp2004.net
@#http://blog.csdn.net/iuhxq

hehe = Hello("http://mmsg.qq.com/cgi-bin/gddylist?Type=13&Sort=1&Page=3", "<html>", "</html>", ".*(<td width=""35%"" bgcolor=""#[\dABCDE]{6}"">(.*)</td>)[.\n]*", "<font style=""font-size:9pt;"" color=blue>$2</font><br>")
response.Write hehe

Function Hello(strUrl, strStart, strEnd, patrn, replStr)
Str = GetBody(strUrl)
Str = MyMid(Str, strStart, strEnd)
Str = ReplaceTest(patrn, replStr, Str)
Hello = Str
End Function

Function MyMid(Str, strstart, strend)
If strstart = "" Then
i = 0
Else
i = InStr(Str, strstart)
End If
If strend = "" Then
j = Len(Str)
Else
j = InStr(i, Str, strend)
End If
MyMid = Mid(Str, i, j - i + 1)
End Function

Function ReplaceTest(patrn, replStr, str1)
Dim regEx, match, matches
Set regEx = New RegExp
regEx.Pattern = patrn
regEx.IgnoreCase = True
regEx.Global = True
Set matches = regEx.Execute(str1)
For Each match in matches
ReplaceTest = ReplaceTest&regEx.Replace(Match.Value, replStr)
Next
End Function

Function GetBody(Url)
Set objXML = CreateObject("Microsoft.XMLHTTP")
With objXML
.Open "Get", Url, False, "", ""
.SEnd
GetBody = .ResponseBody
End With
GetBody = BytesToBstr(GetBody, "GB2312")
Set objXML = Nothing
End Function

Function BytesToBstr(strBody, CodeBase)
Set objStream = Server.CreateObject("Adodb.Stream")
With objStream
.Type = 1
.Mode = 3
.Open
.Write strBody
.Position = 0
.Type = 2
.Charset = CodeBase
BytesToBstr = .ReadText
.Close
End With
Set objStream = Nothing
End Function
%>
其他调用示例:
hehe = Hello("http://list.mp3.baidu.com/song/A.htm", "<table width=""90%"" border=""0"" align=""center"" cellpadding=""3"" cellspacing=""0"" bgcolor=""#f5f5f5"" >", "<DIV align=center>", ".*(<td width=""20%""><a href="".*\.htm"" target=_blank>)(.*)(</a></td>)[.\n]*", "<font style=""font-size:9pt;"" color=blue>$2</font><br>")



原文转自:http://www.ltesting.net