Tuesday 25 September 2007

Extract data from Websites.

I was'nt knowing that this can be done until I got the requirement to do this job.

good example for login in to rediff is here http://www.codeproject.com/useritems/kirangoka.asp

My code:

Imports mshtml
Imports System.Xml
Imports System.IO

Imports System.Data
Imports System.Data.SqlClient



Public Class getInspectorDetails

Const conStr As String = "Data Source=myservername1;Initial Catalog=Aslam_Test;User Id=sa; Password=mypass;"
'Dim conStrPostCode As String = "Data Source=myservername1;Initial Catalog=Common;User Id=sa; Password=mypassword2;"
Dim conn As SqlConnection = New SqlConnection(conStr)
Dim IntroducerName As String = ""

Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
On Error Resume Next
Dim CertNum As String
Dim dr As SqlDataReader

If conn.State = ConnectionState.Closed Then
With conn
.Open()
End With
End If

Dim cmd As New SqlCommand
Dim sqlStr As String
cmd.Connection = conn
sqlStr = "Select distinct(Accreditation_Certification_Number) FROM [Aslam_Test].[dbo].[AssessorInspector]"
Accreditation_Certification_Number FROM InspectorDetails)"
sqlStr = sqlStr + " order by [Accreditation_Certification_Number]"

cmd.CommandText = sqlStr
cmd.CommandType = CommandType.Text
dr = cmd.ExecuteReader()

While dr.Read()
CloseAll()
CertNum = dr("Accreditation_Certification_Number")
Dim wbBrowser As New SHDocVw.InternetExplorer
'wbBrowser.Visible = True
wbBrowser.Visible = False
wbBrowser.Navigate("https://www.hcrregister.com/FindAssessorInspector", Nothing, Nothing, Nothing, Nothing)

Do
Loop Until Not wbBrowser.Busy

SearchForInspectorByCertNum(wbBrowser, CertNum)
ClickInspectorName(wbBrowser)
ReadAndRecordInspectorDetails(IntroducerName, CertNum)
wbBrowser.Quit()
wbBrowser = Nothing
End While

dr.Close()
dr = Nothing
cmd = Nothing

conn.Close()
conn = Nothing
MsgBox("Updated Database from HCR")
End Sub

Public Sub SearchForInspectorByCertNum(ByRef wbBrowser As SHDocVw.InternetExplorer, ByVal CertNum As String)


Dim HTMLDoc As mshtml.HTMLDocument

Do
Loop Until Not wbBrowser.Busy

HTMLDoc = wbBrowser.Document

Dim iHTMLCol As IHTMLElementCollection
Dim iHTMLEle As IHTMLElement
Dim str As String

iHTMLCol = HTMLDoc.getElementsByTagName("input")


' Type the user name in the username text box
For Each iHTMLEle In iHTMLCol
If Not iHTMLEle.getAttribute("name") Is Nothing Then
str = iHTMLEle.getAttribute("name").ToString
If str = "hiCertNumber" Then
iHTMLEle.setAttribute("value", CertNum)
Exit For
End If
End If
Next

' Press the submit button
For Each iHTMLEle In iHTMLCol
If Trim(iHTMLEle.outerHTML) = "" Then
iHTMLEle.click()
Exit For
End If
Next

Do
Loop Until Not wbBrowser.Busy
End Sub

'click on inspector link
Public Sub ClickInspectorName(ByRef wbBrowser As SHDocVw.InternetExplorer)

Dim HTMLDoc As mshtml.HTMLDocument

Do
Loop Until Not wbBrowser.Busy

HTMLDoc = wbBrowser.Document

Dim iHTMLCol As IHTMLElementCollection
iHTMLCol = HTMLDoc.links
iHTMLCol.item(13).click() 'click on link

IntroducerName = iHTMLCol.item(13).innerhtml

Do
Loop Until Not wbBrowser.Busy
End Sub

Public Sub ReadAndRecordInspectorDetails(ByVal name As String, ByVal CertNum As String)
On Error Resume Next
Dim wbBrowser As New SHDocVw.InternetExplorer

Dim winds As New SHDocVw.ShellWindows()
System.Threading.Thread.Sleep(2500)
Dim noIE As Boolean = False
For Each ie As SHDocVw.InternetExplorer In winds
If ie.MenuBar = False Then
'If ie.LocationURL = "https://www.hcrregister.com/FindAssessorInspector" And CInt(ie.Height) = 583 And CInt(ie.Width) = 512 Then
wbBrowser = ie
noIE = True
Exit For
'Else
' ie.Quit()
End If
Next

If noIE = True Then
Dim HTMLDoc As mshtml.HTMLDocument
Do
Loop Until Not wbBrowser.Busy

HTMLDoc = wbBrowser.Document
Dim iHTMLCol As IHTMLElementCollection 'for rows
Dim cell As IHTMLElement

iHTMLCol = HTMLDoc.getElementsByTagName("tr")

Dim i As HTMLTableRow
Dim iCountRow As Integer
Dim Inspectors(,) As String
ReDim Inspectors(iHTMLCol.length - 1, 4)

Dim tmpArr() As String

For Each i In iHTMLCol
cell = i.cells.item(0)
If Not i.cells.item(1) Is Nothing Then

If cell.innerText Is Nothing Then
Inspectors(iCountRow, 0) = ""
Else
tmpArr = cell.innerText.Split(". ")
Inspectors(iCountRow, 0) = Trim(tmpArr(tmpArr.Length - 1))
End If

cell = i.cells.item(1)
If cell.innerText Is Nothing Then
Inspectors(iCountRow, 1) = ""
Else
Inspectors(iCountRow, 1) = cell.innerText
End If
iCountRow = iCountRow + 1
End If
Next

If iCountRow > 0 Then
Dim connNew As SqlConnection = New SqlConnection(conStr)
If connNew.State = ConnectionState.Closed Then
With connNew
.Open()
End With
End If

Dim cmdInspectorInsert As New SqlCommand
cmdInspectorInsert.Connection = connNew
cmdInspectorInsert.CommandType = CommandType.StoredProcedure
cmdInspectorInsert.CommandText = "sp_InspectorDetails_save"
cmdInspectorInsert.Parameters.Clear()
cmdInspectorInsert.Parameters.Add("Name", SqlDbType.VarChar).Value = name
cmdInspectorInsert.Parameters.Add("Accreditation_Certification_Number", SqlDbType.VarChar).Value = Inspectors(0, 1)
cmdInspectorInsert.Parameters.Add("Status", SqlDbType.VarChar).Value = Inspectors(1, 1)
cmdInspectorInsert.Parameters.Add("Accreditation_Certification_Date", SqlDbType.DateTime).Value = Inspectors(2, 1)
cmdInspectorInsert.Parameters.Add("CompanyName", SqlDbType.VarChar).Value = Inspectors(3, 1)
cmdInspectorInsert.Parameters.Add("PostcodeCoverage", SqlDbType.VarChar).Value = Inspectors(4, 1)
cmdInspectorInsert.Parameters.Add("Qualifications", SqlDbType.VarChar).Value = Inspectors(5, 1)
cmdInspectorInsert.Parameters.Add("Specialisms", SqlDbType.VarChar).Value = Inspectors(6, 1)
cmdInspectorInsert.Parameters.Add("AddressLine1", SqlDbType.VarChar).Value = Inspectors(7, 1)
cmdInspectorInsert.Parameters.Add("AddressLine2", SqlDbType.VarChar).Value = Inspectors(8, 1)
cmdInspectorInsert.Parameters.Add("AddressLine3", SqlDbType.VarChar).Value = Inspectors(9, 1)
cmdInspectorInsert.Parameters.Add("PostTown", SqlDbType.VarChar).Value = Inspectors(10, 1)
cmdInspectorInsert.Parameters.Add("postcode", SqlDbType.VarChar).Value = Inspectors(11, 1)
cmdInspectorInsert.Parameters.Add("Website", SqlDbType.VarChar).Value = Inspectors(12, 1)
cmdInspectorInsert.Parameters.Add("Email", SqlDbType.VarChar).Value = Inspectors(13, 1)
cmdInspectorInsert.Parameters.Add("Telephone", SqlDbType.VarChar).Value = Inspectors(14, 1)
cmdInspectorInsert.Parameters.Add("Fax", SqlDbType.VarChar).Value = Inspectors(15, 1)
cmdInspectorInsert.ExecuteNonQuery()

cmdInspectorInsert = Nothing
connNew.Close()
connNew = Nothing
End If
Do
Loop Until Not wbBrowser.Busy

wbBrowser.Quit()
wbBrowser = Nothing
End If
End Sub


Public Sub CloseAll()
'Dim wbBrowser As New SHDocVw.InternetExplorer
'Dim winds As New SHDocVw.ShellWindows()
'For Each ie As SHDocVw.InternetExplorer In winds
' If ie.MenuBar = False Then
' ie.Quit()
' End If
'Next
End Sub


End Class

No comments: