Extracting data from the WWW with ASP pages

Msgbox GetData("http://www.yahoo.com/index.html")

Function GetData(strUrl) 'As String
'Uses Internet Explorer to return a string containing the 
'contents of an http or ftp web page. This returns the worst
'quality of data but is the most likely to be supported 
'without installing anything.
Dim web 'As InternetExplorer.Application
Dim doc 'As InternetExplorer.Document
Dim strWebPage 'As String
    Set web = CreateObject("InternetExplorer.Application")
    web.Navigate strUrl
    Do While web.Busy
    Loop
    On Error Resume Next
    Set doc = Nothing
    Do Until Not doc Is Nothing
        Set doc = web.Document
    Loop
    strWebPage = doc.body.OuterHTML 'This does not return the head or title sections.
    web.Quit
    GetData = strWebPage
End Function

function getURL(aURL, anyPostData, anyUserName, anyPassword) 
DIM objSrvHTTP,web,method,s
	on error resume next
	s=""
	set objSrvHTTP = Server.CreateObject ("Msxml2.ServerXMLHTTP.3.0")
	if anyPostData=empty then
		objSrvHTTP.open "GET",aURL, true, anyUsername, anyPassword
	else
		objSrvHTTP.open "POST",aURL, true, anyUsername, anyPassword
		objSrvHTTP.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
		end if
	objSrvHTTP.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"
	objSrvHTTP.send anyPostData
	objSrvHTTP.waitForResponse 5
	select case objSrvHTTP.readyState
		case 0 'object created, but no URL opened
			debug "getURL("&aURL&", "&anyPostData&", "&anyUserName&", "&anyPassword&"): Object Created, no URL opened"
			err.raise 1, "Object Created, no URL opened"
			exit function
		case 1	'loading: URL opened, but no data sent
			debug "getURL("&aURL&", "&anyPostData&", "&anyUserName&", "&anyPassword&"):URL opened, no data sent"
			err.raise 2, "URL opened, no data sent"
			exit function
		case 2	'loaded: data sent, status and headers available, no response recieved.
			debug "getURL("&aURL&", "&anyPostData&", "&anyUserName&", "&anyPassword&"):No response from remote host"
			err.raise 3, "No response from remote host"
			exit function
		case 3	'interactive: some data recieved. responseBody and responseText will return partial results.
			debug "getURL("&aURL&", "&anyPostData&", "&anyUserName&", "&anyPassword&"):Partial response recieved:"
			debug server.htmlencode(objSrvHTTP.responseText)
			s = objSrvHTTP.responseText
			err.raise 4, "Partial response recieved"
		case 4	'complete: 
			s = objSrvHTTP.responseText
		end select
	getURL = s
	end function


Function GetData(strUrl)
'Uses the Microsoft WinHttp object from:
'http://download.microsoft.com/download/MsHttp50/Install/5.0/NT45XP/EN-US/winhttp50sdk.exe
Dim web, strWebPage
Const WinHttpRequestOption_UserAgentString = 0
Const WinHttpRequestOption_EscapePercentInURL = 3
Const WinHttpRequestOption_EnableRedirects = 6
Const WinHttpRequestOption_UrlEscapeDisable = 7
Const WinHttpRequestOption_UrlEscapeDisableQuery = 8
    Set web = CreateObject("WinHttp.WinHttpRequest.5")
    web.Option(WinHttpRequestOption_EnableRedirects) = True
    web.Open "GET", strURL, False
    web.Send
    If web.Status = "200" Then
        strWebPage = web.ResponseText
        'Try to follow META redirect pages
        If InStr(1, strWebPage, "<META HTTP-EQUIV=""REFRESH"" CONTENT=""", vbTextCompare) <> 0 Then
            strWebPage = Mid(strWebPage, InStr(1, strWebPage, "<META HTTP-EQUIV=""REFRESH"" CONTENT=""", vbTextCompare))
            strWebPage = Mid(strWebPage, InStr(1, strWebPage, "URL=", vbTextCompare) + 4)
            strWebPage = Left(strWebPage, InStr(strWebPage, ">") - 1)
            Do While InStr(strWebPage, """") <> 0 : strWebPage = Replace(strWebPage, """", "") : Loop
            strWebPage = Trim(strWebPage)
            If InStr(strWebPage, " ") <> 0 Then strWebPage = Left(strWebPage, InStr(strWebPage, " ") - 1)
            strWebPage = MakeAbsolute(strWebPage, strUrl)
            'Change the input argument to notify the calling routine
            strUrl = strWebPage
            strWebPage = GetData(strWebPage)
        End If
        GetData = strWebPage
    Else
        GetData = ""
    End If
End Function

Function GetData(strUrl) 'As String
'Uses POST.EXE from http://www.ericphelps.com/webget/post.zip
'Downloads an http web page and returns a string containing the contents
Dim ts 'As Scripting.TextStream
Dim fs 'As Scripting.FileSystemObject
Dim web 'As Post.clsPost
Dim strWebPage 'As String
Dim strWebSite 'As String
Dim strResource 'As String
    If left(strUrl,7) = "http://" Then
        strWebSite = Mid(strUrl, 8)
    Else
        strWebSite = strUrl
    End If
    If Instr(strWebSite, "/") = 0 Then strWebSite = strWebSite & "/"
    strResource = Mid(strWebSite, Instr(strWebSite, "/"))
    strWebSite = Left(strWebSite, Instr(strWebSite, "/") - 1)
    Set web = CreateObject("Post.clsPost")
    web.DataTimeout = 120
    web.SocketTimeout = 60
    'Get it! (Force string so VBS doesn't try to pass a string-type variant instead)
    strWebPage = web.GetHeader(Cstr(strWebSite), Cstr(strResource))
    'Check to see if we got content
    If ((Len(Mid(strWebPage, Instr(strWebPage, vbCrlf & vbCrlf) + 4)) = 0) Or (Left(strWebPage, 11) = "HTTP/1.1 30")) Then
        If InStr(strWebPage, "Location:") <> 0 Then
            'Recursive follow of http redirect. Trim the new location out of the response
            strWebPage = Mid(strWebPage, InStr(strWebPage, "Location:") + 9)
            strWebPage = Trim(strWebPage)
            strWebPage = Left(strWebPage, InStr(strWebPage, vbCr) - 1)
            'Change the input argument to notify the calling routine
            strUrl = strWebPage
            strWebPage = GetData(strWebPage)
        Else
            strWebPage = ""
        End If
    Else
        'Recursive follow of meta tag redirect
        If InStr(1, strWebPage, "<META HTTP-EQUIV=""REFRESH"" CONTENT=""", vbTextCompare) <> 0 Then
            strWebPage = Mid(strWebPage, InStr(1, strWebPage, "<META HTTP-EQUIV=""REFRESH"" CONTENT=""", vbTextCompare))
            strWebPage = Mid(strWebPage, InStr(1, strWebPage, "URL=", vbTextCompare) + 4)
            strWebPage = Left(strWebPage, InStr(strWebPage, ">") - 1)
            Do While InStr(strWebPage, """") <> 0 : strWebPage = Replace(strWebPage, """", "") : Loop
            strWebPage = Trim(strWebPage)
            If InStr(strWebPage, " ") <> 0 Then strWebPage = Left(strWebPage, InStr(strWebPage, " ") - 1)
            strWebPage = MakeAbsolute(strWebPage, "http://" & strWebSite & strResource)
            'Change the input argument to notify the calling routine
            strUrl = strWebPage
            strWebPage = GetData(strWebPage)
        Else        
            'Remove the header
            strWebPage = Mid(strWebPage, Instr(strWebPage, vbCrlf & vbCrlf) + 4)
        End If
    End If
    GetData = strWebPage
End Function

Function GetData(strUrl) 'As String
'Uses MSINET.OCX from http://activex.microsoft.com/controls/vb5/msinet.cab 
'Requires a Microsoft developer's license. May require license fix from 
'ftp://ftp.microsoft.com/softlib/mslfiles/vbc.exe
'Returns a string containing the contents of a web page
Dim web 'As InetCtls.Inet
Dim strWebPage 'As String
    Set web = CreateObject("InetCtls.Inet")
    web.RequestTimeout = 60
    strWebPage = web.OpenURL(Cstr(strUrl))
    GetData = strWebPage
End Function

Function GetData(strUrl) 'As String
'Uses WGET.EXE from http://wget.sunsite.dk/
'to return a string containing the contents of an http web page
Dim ts 'As Scripting.TextStream
Dim wsh 'As Wscript.Shell
Dim fs 'As Scripting.FileSystemObject
Dim fil 'As Scripting.File
Dim strWebPage 'As String
Dim strTempFile 'As String
Const ForReading = 1
Const ForWriting = 2
Const TemporaryFolder = 2
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set wsh = CreateObject("Wscript.Shell")
    strTempFile = fs.BuildPath(fs.GetSpecialFolder(TemporaryFolder), fs.GetBaseName(Wscript.ScriptFullName) & ".tmp")
    Set ts = fs.CreateTextFile(strTempFile, True)
    ts.Close
    Set fil = fs.GetFile(strTempFile)
    wsh.Run "wget.exe -O " & fil.ShortPath & " " & strUrl, 0, True
    Set ts = fs.OpenTextFile(strTempFile, ForReading, True)
    strWebPage = ts.ReadAll
    ts.Close
    fil.Delete True
    GetData = strWebPage
End Function

Function GetData(strUrl) 'As String
'This is not my original script, but is one of the public domain
'scripts included as a sample with PrimalSCRIPT. Credit for the
'script is attributed to "Eric K." and "Michael Harris". This
'script does a BINARY download. I've modified it here to suit
'my purposes. The "Chrw-Ascw-Chr-Ascb-Midb" is what is needed to
'perform a byte array to string conversion! Keep it in mind.
Dim xml 'As MSXML2.XMLHTTP
Dim strWebPage 'As String
Dim strTemp 'As String
Dim lngCounter 'As Long
    Set xml = CreateObject("MSXML2.XMLHTTP")
    xml.Open "GET", strUrl, False 'false tells it to wait for the reply. Will hang if no reply.
    xml.Send
    strTemp = xml.ResponseBody	'also responseText for non-binary
    Set xml = Nothing
    For lngCounter = 0 to UBound(strTemp)
	strWebPage = strWebPage & Chrw(Ascw(Chr(Ascb(Midb(strTemp,lngCounter+1,1)))))
    Next
    GetData = strWebPage
End Function

Function GetData(strUrl) 'As String
'Uses WEBGET.EXE from http://www.ericphelps.com/webget/get.zip
'Downloads an http web page and returns a string containing the contents
Dim web 'As WebGet.Web
Dim sRxData 'As String
    Set web = CreateObject("WebGet.Web")
    If Left(strUrl, 7) <> "http://" Then strUrl = "http://" & strUrl
    web.URL = strUrl
    sRxData = web.GetText
    GetData = sRxData
End Function

See also:

http://www.google.com/search?q=javascript+http+request+scrape&hl=en&lr=lang_en&safe=off&start=10&sa=N

Questions:

Comments:

Interested:

Code: