본문 바로가기

자료

[C# / vb.net] HtmlAgilityPack 라이브러리를 이용한 크롤링시 라이브러리 예제

728x90

[C# / vb.net] HtmlAgilityPack 라이브러리를 이용한 크롤링시 라이브러리 예제

 

Imports HtmlAgilityPack

        Dim doc As New HtmlDocument, a As HtmlNode, img As HtmlNode, hNode As HtmlNode
        doc.LoadHtml(T)
        If Not doc.DocumentNode.SelectNodes("//*[@id=""productList""]/li") Is Nothing Then
            For Each li As HtmlNode In doc.DocumentNode.SelectNodes("//*[@id=""productList""]/li")
                a = li.SelectSingleNode("a")
                T = a.SelectSingleNode("dl/dd/div[2]").InnerHtml.Replace(vbLf, Space(1)).Trim
                T = a.Attributes("data-item-id").Value
                T = a.Attributes("data-is-rocket").Value = "true"
                T = "https://www.coupang.com" & a.Attributes("href").Value.Replace("&", "&")
                img = a.SelectSingleNode("dl/dt/img")
                T = "https:" & img.Attributes("src").Value
                If Not img.Attributes("data-img-src") Is Nothing Then
                    T = "https:" & img.Attributes("data-img-src").Value
                Else
                Application.DoEvents()
                End If
                If Not a.SelectSingleNode("//*[@id=""searchOptionForm""]/div/div/div[1]/div/div[1]/h3") Is Nothing Then
                    T = doc.DocumentNode.SelectSingleNode("//*[@id=""searchOptionForm""]/div/div/div[1]/div/div[1]/h3").InnerText.Replace(vbTab, Space(1)).Replace(vbLf, Space(1)).Split("("c).First.Trim
                End If
                T = a.SelectSingleNode("dl/dd/div[3]/div[1]/div[1]/em/strong").InnerText
                hNode = GetClassNode(a.SelectSingleNode("dl/dd/div[3]/div[1]/div[1]/span[1]"), "span", "discount-percentage")
            next
        End If

    Private Function GetClassNode(ByVal Node As HtmlNode, ByVal tagName As String, ByVal className As String) As HtmlNode
        Try
            Dim ND As List(Of HtmlNode) = Node.Descendants(tagName).Where(Function(k) k.Attributes.Contains("class") AndAlso k.Attributes("class").Value.Contains(className)).ToList
            If ND.Count > 0 Then Return ND.First
        Catch ex As Exception
        End Try
        Return Nothing
    End Function

 

HtmlAgilityPack.dll
0.16MB

728x90