Всё, что необходимо для успешного выполнения приложения:
Элементы: форма(Form1), кнопка(Button1), рисунок(PictureBox1), браузер(WebBrowser1)
Project/Properties/References: помимо имеющихся нужно добавить: Microsoft.mshtml.dll и Interop.SHDocVw.dll
- Код: Выделить всё
Imports System
Imports System.IO
Imports System.Windows
Imports System.Windows.Forms
Imports System.Drawing.Graphics
Imports System.Runtime.InteropServices
Imports SHDocVw
Imports mshtml
Public Class Form1
Private Class GDI32
Public Const SRCCOPY As Integer = &HCC0020
' BitBlt dwRop parameter
<DllImport("gdi32.dll")> _
Public Shared Function BitBlt(ByVal hObject As IntPtr, ByVal nXDest As Integer, ByVal nYDest As Integer, ByVal nWidth As Integer, ByVal nHeight As Integer, ByVal hObjectSource As IntPtr, _
ByVal nXSrc As Integer, ByVal nYSrc As Integer, ByVal dwRop As Integer) As Boolean
End Function
<DllImport("gdi32.dll")> _
Public Shared Function CreateCompatibleBitmap(ByVal hDC As IntPtr, ByVal nWidth As Integer, ByVal nHeight As Integer) As IntPtr
End Function
<DllImport("gdi32.dll")> _
Public Shared Function CreateCompatibleDC(ByVal hDC As IntPtr) As IntPtr
End Function
<DllImport("gdi32.dll")> _
Public Shared Function DeleteDC(ByVal hDC As IntPtr) As Boolean
End Function
<DllImport("gdi32.dll")> _
Public Shared Function DeleteObject(ByVal hObject As IntPtr) As Boolean
End Function
<DllImport("gdi32.dll")> _
Public Shared Function SelectObject(ByVal hDC As IntPtr, ByVal hObject As IntPtr) As IntPtr
End Function
End Class
Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
PictureBox1.Visible = False
Button1.Text = "Загрузить полностью сайт и сохранить все рисунки на диск, не загружая их заново"
End Sub
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
' загружаем сайт (полностью)
WebBrowser1.Navigate("http://www.techdays.ru/Registration.aspx")
While Not WebBrowser1.ReadyState = WebBrowserReadyState.Complete
Me.Text = "Загружаем сайт.. " & WebBrowser1.ReadyState.ToString
Application.DoEvents()
End While
MsgBox("Страница загружена. Сейчас будем сохранять все отображённые рисунки...", MsgBoxStyle.Information, "")
Dim allPictures As System.Windows.Forms.HtmlElementCollection = WebBrowser1.Document.GetElementsByTagName("img")
Dim oneImage As System.Windows.Forms.HtmlElement
Dim i As Integer = 0
For Each oneImage In allPictures
i += 1
Me.Text = "Обрабатываем рисунок " & i & " из " & allPictures.Count
ImageToFile(oneImage.DomElement, CStr(i))
pause(100000)
Next
PictureBox1.Visible = False
MsgBox("Всего сохранено: " & i & " рисунков", MsgBoxStyle.Information, "")
Me.Text = "Всего обработано рисунков: " & allPictures.Count
End Sub
Sub ImageToFile(ByVal element As mshtml.IHTMLElement, ByVal nameIndex As String)
Dim i As Integer
Dim render As mshtml.IHTMLElementRender = element
Dim rendElem As ScreenGrab.IHTMLElementRender = element
Dim graphics As Graphics
Dim hdcDestination, hdcMemory, bitmap, hOld As IntPtr
Dim ImageFileName As String
PictureBox1.Visible = True
Dim tempPicBox As New PictureBox
tempPicBox = PictureBox1
tempPicBox.Show()
For i = 1 To 2 'одного раза мало - не всегда сразу дорисовываются картинки с меньшего размера в больший
tempPicBox.Width = element.clientwidth
tempPicBox.Height = element.clientheight
If Not (render Is Nothing) Then
graphics = tempPicBox.CreateGraphics
graphics.Clear(Color.White) ' стираем в один цвет - прозрачные картинки накладываются друг на друга
Try
hdcDestination = graphics.GetHdc
rendElem.DrawToDC(hdcDestination)
hdcMemory = GDI32.CreateCompatibleDC(hdcDestination)
bitmap = GDI32.CreateCompatibleBitmap(hdcDestination, element.clientwidth, element.clientheight)
If Not (bitmap = IntPtr.Zero) Then
hOld = CType(GDI32.SelectObject(hdcMemory, bitmap), IntPtr)
GDI32.BitBlt(hdcMemory, 0, 0, element.clientwidth, element.clientheight, hdcDestination, 0, 0, CType(GDI32.SRCCOPY, Integer))
GDI32.SelectObject(hdcMemory, hOld)
GDI32.DeleteDC(hdcMemory)
graphics.ReleaseHdc(hdcDestination)
tempPicBox.Image = Image.FromHbitmap(bitmap)
End If
Finally
CType(graphics, IDisposable).Dispose()
End Try
End If
ImageFileName = element.nameProp
Next
ImageFileName = "Image" & nameIndex & ".emf"
Dim host As String = WebBrowser1.Document.Url.Host
Dim path As String = "c:\temp\savedImages\" & host & "\"
If Not Directory.Exists(path) Then Directory.CreateDirectory(path)
tempPicBox.Image.Save(path & ImageFileName, System.Drawing.Imaging.ImageFormat.Emf)
End Sub
Sub pause(ByVal count As Integer)
For i = 0 To count
Application.DoEvents()
Next
End Sub
End Class
Namespace ScreenGrab
<Guid("3050f669-98b5-11cf-bb82-00aa00bdce0b"), InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown), ComVisible(True), ComImport()> _
Interface IHTMLElementRender
Sub DrawToDC(<[In]()> ByVal hDC As IntPtr)
Sub SetDocumentPrinter(<[In](), MarshalAs(UnmanagedType.BStr)> ByVal bstrPrinterName As String, <[In]()> ByVal hDC As IntPtr)
End Interface
Public Class IEElementCapture
Private webBrowser As IWebBrowser2 = Nothing
Public Sub New(ByVal webBrowser As IWebBrowser2)
Me.webBrowser = webBrowser
End Sub
Public Function Capture(ByRef g As Graphics) As Boolean
If webBrowser Is Nothing Then
Return False
End If
Dim htmlDocument As IHTMLDocument2 = DirectCast(webBrowser.Document, IHTMLDocument2)
If htmlDocument IsNot Nothing Then
Dim bodyElement As IHTMLElement = DirectCast(htmlDocument.body, IHTMLElement)
If bodyElement IsNot Nothing Then
Dim render As IHTMLElementRender = DirectCast(bodyElement, IHTMLElementRender)
If render IsNot Nothing Then
Dim memDC As IntPtr = g.GetHdc()
render.DrawToDC(memDC)
Return True
End If
End If
End If
Return False
End Function
End Class
End Namespace
Проверив всё сохранённое можно убедиться, что никаких дополнительных загрузок из интернета не происходит (например сохранённый рисунок защитного кода captcha абсолютно идентичен имеющемуся в браузере). Также рисунки не копируются из кэша. Всё снимается из окна браузера.
Блок "примитивной" паузы сделан для возможности визуального слежения за процессом переноса рисунков из браузера в PictureBox1.
Всё работает прекрасно с элементом WebBrowser. Однако он не поддерживает многопоточность и поэтому я пытаюсь переключиться на элемент, который поддерживает многопоточность - это myBrowser As New SHDocVw.InternetExplorer
Возлагаю большие надежды на Nord777, который никогда не оставлял мои вопросы без внимания и всегда профессионально на них отвечал.
Вот переделанный выше код под новый браузер, но не так всё гладко. Что нужно поменять, чтобы заработало?
- Код: Выделить всё
Imports System
Imports System.IO
Imports System.Windows
Imports System.Windows.Forms
Imports System.Drawing.Graphics
Imports System.Runtime.InteropServices
Imports SHDocVw
Imports mshtml
Public Class Form1
Private Class GDI32
Public Const SRCCOPY As Integer = &HCC0020
' BitBlt dwRop parameter
<DllImport("gdi32.dll")> _
Public Shared Function BitBlt(ByVal hObject As IntPtr, ByVal nXDest As Integer, ByVal nYDest As Integer, ByVal nWidth As Integer, ByVal nHeight As Integer, ByVal hObjectSource As IntPtr, _
ByVal nXSrc As Integer, ByVal nYSrc As Integer, ByVal dwRop As Integer) As Boolean
End Function
<DllImport("gdi32.dll")> _
Public Shared Function CreateCompatibleBitmap(ByVal hDC As IntPtr, ByVal nWidth As Integer, ByVal nHeight As Integer) As IntPtr
End Function
<DllImport("gdi32.dll")> _
Public Shared Function CreateCompatibleDC(ByVal hDC As IntPtr) As IntPtr
End Function
<DllImport("gdi32.dll")> _
Public Shared Function DeleteDC(ByVal hDC As IntPtr) As Boolean
End Function
<DllImport("gdi32.dll")> _
Public Shared Function DeleteObject(ByVal hObject As IntPtr) As Boolean
End Function
<DllImport("gdi32.dll")> _
Public Shared Function SelectObject(ByVal hDC As IntPtr, ByVal hObject As IntPtr) As IntPtr
End Function
End Class
Private Sub Form1_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
PictureBox1.Visible = False
Button1.Text = "Загрузить полностью сайт и сохранить все рисунки на диск, не загружая их заново"
End Sub
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
' загружаем сайт (полностью)
Dim myBrowser As New SHDocVw.InternetExplorer
myBrowser.Visible = True
myBrowser.Navigate("http://www.techdays.ru/Registration.aspx")
While Not myBrowser.ReadyState = WebBrowserReadyState.Complete
Me.Text = "Загружаем сайт.. " & myBrowser.ReadyState.ToString
Application.DoEvents()
End While
MsgBox("Страница загружена. Сейчас будем сохранять все отображённые рисунки...", MsgBoxStyle.Information, "")
Dim doc As mshtml.IHTMLDocument = myBrowser.Document
Dim allPictures As mshtml.IHTMLElementCollection = doc.getElementsByTagName("img")
Dim oneImage As mshtml.IHTMLElement
Dim i As Integer = 0
For Each oneImage In allPictures
i += 1
Me.Text = "Обрабатываем рисунок " & i & " из " & allPictures.length
ImageToFile(oneImage, CStr(i))
pause(100000)
Next
PictureBox1.Visible = False
MsgBox("Всего сохранено: " & i & " рисунков", MsgBoxStyle.Information, "")
Me.Text = "Всего обработано рисунков: " & allPictures.length
End Sub
Sub ImageToFile(ByVal element As mshtml.IHTMLElement, ByVal nameIndex As String)
Dim i As Integer
Dim render As mshtml.IHTMLElementRender = element
Dim rendElem As ScreenGrab.IHTMLElementRender = element
Dim graphics As Graphics
Dim hdcDestination, hdcMemory, bitmap, hOld As IntPtr
Dim ImageFileName As String
PictureBox1.Visible = True
Dim tempPicBox As New PictureBox
tempPicBox = PictureBox1
tempPicBox.Show()
For i = 1 To 2 'одного раза мало - не всегда сразу дорисовываются картинки с меньшего размера в больший
tempPicBox.Width = element.clientwidth
tempPicBox.Height = element.clientheight
If Not (render Is Nothing) Then
graphics = tempPicBox.CreateGraphics
graphics.Clear(Color.White)
Try
hdcDestination = graphics.GetHdc
rendElem.DrawToDC(hdcDestination)
hdcMemory = GDI32.CreateCompatibleDC(hdcDestination)
bitmap = GDI32.CreateCompatibleBitmap(hdcDestination, element.clientwidth, element.clientheight)
If Not (bitmap = IntPtr.Zero) Then
hOld = CType(GDI32.SelectObject(hdcMemory, bitmap), IntPtr)
GDI32.BitBlt(hdcMemory, 0, 0, element.clientwidth, element.clientheight, hdcDestination, 0, 0, CType(GDI32.SRCCOPY, Integer))
GDI32.SelectObject(hdcMemory, hOld)
GDI32.DeleteDC(hdcMemory)
graphics.ReleaseHdc(hdcDestination)
tempPicBox.Image = Image.FromHbitmap(bitmap)
End If
Finally
CType(graphics, IDisposable).Dispose()
End Try
End If
ImageFileName = element.nameProp
Next
ImageFileName = "Image" & nameIndex & ".emf"
Dim host As String = WebBrowser1.Document.Url.Host
Dim path As String = "c:\temp\savedImages\" & host & "\"
If Not Directory.Exists(path) Then Directory.CreateDirectory(path)
tempPicBox.Image.Save(path & ImageFileName, System.Drawing.Imaging.ImageFormat.Emf)
End Sub
Sub pause(ByVal count As Integer)
For i = 0 To count
Application.DoEvents()
Next
End Sub
End Class
Namespace ScreenGrab
<Guid("3050f669-98b5-11cf-bb82-00aa00bdce0b"), InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown), ComVisible(True), ComImport()> _
Interface IHTMLElementRender
Sub DrawToDC(<[In]()> ByVal hDC As IntPtr)
Sub SetDocumentPrinter(<[In](), MarshalAs(UnmanagedType.BStr)> ByVal bstrPrinterName As String, <[In]()> ByVal hDC As IntPtr)
End Interface
Public Class IEElementCapture
Private webBrowser As IWebBrowser2 = Nothing
Public Sub New(ByVal webBrowser As IWebBrowser2)
Me.webBrowser = webBrowser
End Sub
Public Function Capture(ByRef g As Graphics) As Boolean
If webBrowser Is Nothing Then
Return False
End If
Dim htmlDocument As IHTMLDocument2 = DirectCast(webBrowser.Document, IHTMLDocument2)
If htmlDocument IsNot Nothing Then
Dim bodyElement As IHTMLElement = DirectCast(htmlDocument.body, IHTMLElement)
If bodyElement IsNot Nothing Then
Dim render As IHTMLElementRender = DirectCast(bodyElement, IHTMLElementRender)
If render IsNot Nothing Then
Dim memDC As IntPtr = g.GetHdc()
render.DrawToDC(memDC)
Return True
End If
End If
End If
Return False
End Function
End Class
End Namespace
В следующем блоке выскакивает ошибка:
Sub ImageToFile(ByVal element As mshtml.IHTMLElement, ByVal nameIndex As String)
Dim i As Integer
Dim render As mshtml.IHTMLElementRender = element
Dim rendElem As ScreenGrab.IHTMLElementRender = element
Dim graphics As Graphics
...
Unable to cast COM object of type 'mshtml.HTMLImgClass' to interface type 'mshtml.IHTMLElementRender'. This operation failed because the QueryInterface call on the COM component for the interface with IID '{3050F669-98B5-11CF-BB82-00AA00BDCE0B}' failed due to the following error: Интерфейс не поддерживается (Exception from HRESULT: 0x80004002 (E_NOINTERFACE)).