This program parses a website. The program works well, but is too long. I want to simplify/speed it up.
How program works:
- First , the program finds needed hyperlink in Excel
- Then by the hyperlink , the program goes to the site , where it finds a certain table of elements. Then it takes out the "href" of each element , turns it into a hyperlink , and inserts it into Excel in the first table
- Then again by the hyperlink , the program goes to the site, where it finds a certain table of elements. Then it extracts the text of each element and inserts it into Excel in the second table
- Then it goes through the elements of the 1st and 2nd tables , so that in the 3rd table each element contains a "hyperlink +text"
Sub Softãèïåðññûëêè()
Application.DisplayAlerts = False
Call mainìàññèâû
Application.DisplayAlerts = True
End Sub
Sub mainìàññèâû()
Dim r As Range
Dim firstAddress As String
Dim iLoop As Long
Dim book1 As Workbook
Dim sheetNames(1 To 19) As String
Dim Ssilka As String
'!!! 1. First , the program finds needed hyperlink
sheetNames(1) = "Ëèñò1"
sheetNames(2) = "Ëèñò2"
sheetNames(3) = "Ëèñò3"
sheetNames(4) = "Ëèñò4"
sheetNames(5) = "Ëèñò5"
sheetNames(6) = "Ëèñò6"
sheetNames(7) = "Ëèñò7"
sheetNames(8) = "Ëèñò8"
sheetNames(9) = "Ëèñò9"
sheetNames(10) = "Ëèñò10"
sheetNames(11) = "Ëèñò11"
sheetNames(12) = "Ëèñò12"
sheetNames(13) = "Ëèñò13"
sheetNames(14) = "Ëèñò14"
sheetNames(15) = "Ëèñò15"
sheetNames(16) = "Ëèñò16"
sheetNames(17) = "Ëèñò17"
sheetNames(18) = "Ëèñò18"
sheetNames(19) = "Ëèñò19"
Set book1 = Workbooks.Open("E:\Super M\Ïðîåêò ñòàâêè\Ïîèñê ðåøåíèÿ\Óñîâ 7\Óñëîâèÿ äëÿ àíäåðäîãîâ6円.xlsm")
iLoop = -1
With book1.Worksheets("Ëèñò1").Range("R34:R99")
For Each r In .Rows
If r.Value = 1 Then
iLoop = iLoop + 1
Ssilka = r.Offset(, -13).Hyperlinks.Item(1).Address
.Parent.Parent.Worksheets(sheetNames(1)).Activate
.Parent.Parent.Save
extractTable Ssilka, book1, iLoop
End If
Next r
End With
book1.Save
book1.Close
Exit Sub
End Sub
Function extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
Dim oDom As Object, oTable As Object, oRow As Object
Dim iRows As Integer, iCols As Integer
Dim x As Integer, y As Integer
Dim data()
Dim oHttp As Object
Dim oRegEx As Object
Dim sResponse As String
Dim oRange As Range
Dim Perem1 As String
Dim Perem2 As String
'!!!2. Then by the hyperlink , the program goes to the site , where it finds a certain table of elements.Then it takes out the "href" of each element , turns it into a hyperlink , and inserts it into Excel in the 1-st table
' get page
Set oHttp = CreateObject("MSXML2.XMLHTTP")
oHttp.Open "GET", Ssilka, False
oHttp.Send
' cleanup response
sResponse = StrConv(oHttp.responseBody, vbUnicode)
Set oHttp = Nothing
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Set oRegEx = CreateObject("vbscript.regexp")
With oRegEx
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</1円>"
sResponse = .Replace(sResponse, "")
End With
Set oRegEx = Nothing
' create Document from response
Set oDom = CreateObject("htmlFile")
oDom.Write sResponse
DoEvents
' table with results, indexes starts with zero
Set oTable = oDom.getelementsbytagname("table")(3)
DoEvents
iRows = oTable.Rows.Length
iCols = oTable.Rows(1).Cells.Length
' first row and first column contain no intresting data
ReDim data(1 To iRows - 1, 1 To iCols - 1)
' fill in data array
For x = 1 To iRows - 1
Set oRow = oTable.Rows(x)
For y = 1 To iCols - 1
If oRow.Cells(y).Children.Length > 0 Then
data(x, y) = oRow.Cells(y).getelementsbytagname("a")(0).getattribute("href")
End If
Next y
Next x
Set oRow = Nothing
Set oTable = Nothing
Set oDom = Nothing
' put data array on worksheet
Set oRange = book1.ActiveSheet.Cells(110, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
oRange.NumberFormat = "@"
oRange.Value = data
oRange.Replace What:="about:", Replacement:="http://allscores.ru/soccer/"
Set oRange = Nothing
'!!!! 3. Then again by the hyperlink , the program goes to the site , where it finds a certain table of elements. Then it extracts the text of each element and inserts it into Excel in the 2-nd table
' get page
Set oHttp = CreateObject("MSXML2.XMLHTTP")
oHttp.Open "GET", Ssilka, False
oHttp.Send
' cleanup response
sResponse = StrConv(oHttp.responseBody, vbUnicode)
Set oHttp = Nothing
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Set oRegEx = CreateObject("vbscript.regexp")
With oRegEx
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</1円>"
sResponse = .Replace(sResponse, "")
End With
Set oRegEx = Nothing
' create Document from response
Set oDom = CreateObject("htmlFile")
oDom.Write sResponse
DoEvents
' table with results, indexes starts with zero
Set oTable = oDom.getelementsbytagname("table")(3)
DoEvents
iRows = oTable.Rows.Length
iCols = oTable.Rows(1).Cells.Length
' first row and first column contain no intresting data
ReDim data(1 To iRows - 1, 1 To iCols - 1)
' fill in data array
For x = 1 To iRows - 1
Set oRow = oTable.Rows(x)
For y = 1 To iCols - 1
If oRow.Cells(y).Children.Length > 0 Then
data(x, y) = oRow.Cells(y).innerText
End If
Next y
Next x
Set oRow = Nothing
Set oTable = Nothing
Set oDom = Nothing
' put data array on worksheet
Set oRange = book1.ActiveSheet.Cells(185, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
oRange.NumberFormat = "@"
oRange.Value = data
Set oRange = Nothing
'!!! 4. Then it goes through the elements of the 1-st and 2-nd tables , so that in the 3-rd table each element contains a "hyperlink +text"
For A = 0 To 4
For B = 0 To 65
Perem1 = book1.ActiveSheet.Cells(110 + B, (26 + (iLoop * 21)) + A).Value
Perem2 = book1.ActiveSheet.Cells(185 + B, (26 + (iLoop * 21)) + A).Value
book1.ActiveSheet.Hyperlinks.Add Anchor:=Cells(34 + B, (26 + (iLoop * 21)) + A), Address:=Perem1, TextToDisplay:=Perem2
Next
Next
End Function
1 Answer 1
Having consistent indentation will help readability. Declaring your variables closer to where they are used also helps clear things up, firstAddress
isn't used anywhere.
Using Alt+D+L (Debug>Compile) brings attention to the variables A
and B
not being declared anywhere. Alt+T+O (Tools>Options) and making sure Require Variable Declaration
is enabled will have future-you thanking current-you. Not having variable explicitly declared makes debugging needlessly harder.
Variables that are declared as Integer
are converted to a Long
, better to initially declare them as Long
.
extractTable
is a function but nothing is being returned. Functions have a value returned Public Function Foo(ByVal bar as long) as <Type>
. The function is assigned to what it is returning set Foo = wb1.Range("A1:B2")
or Foo = 9999
depending on whether it's an object or not. extractTable
should be a Sub
Using the Value
property on a Range
object can cause rounding issues (A little reading: https://fastexcel.wordpress.com/2011/11/30/text-vs-value-vs-value2-slow-text-and-how-to-avoid-it/).
Magic numbers. In your code you have the values of 110
, 185
, 26
, 21
, 34
, -13
that are doing things which aren't arbitrary. They are there for a purpose, but what purpose? Numbers don't mean a lot so use a Const
constant to describe what they are doing. Again naming is hard and you'll have to figure out how to best name them. 110
may become Private Const getAttributePopulationRow As Long = 110
.
The above covers the easiest stuff.
Instead of having 1 big Method that does several things it's preferable to have it do 1 thing. This reduces cognitive load and how much you have to remember at an given time. Imagine the below code. Seeing that is much easier to understand what's going on than remembering what 300+ lines of code is supposed to be doing.
Sub Foo()
CreateATable
ManipulateATable
SaveTableInNewWorkbook "C:\SpecificPath\", "Filename.xlsm"
End Sub
Comments '!!!2,3,4
that's describing what you're doing indicates to me that's a new method trying to escape. Code should be self documenting, it should tell you what it's doing by reading it. Comments should describe why it's being done that way. After doing this kind of refactoring extractTable
becomes.
Private Sub extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
TablePart2 Ssilka, iLoop, book1
TablePart3 Ssilka, iLoop, book1
TablePart4 iLoop, book1
End Sub
Once that's done and you compare TablePart2
and TablePart3
you can see they are virtually identical.
When writing code you want to follow DRY (Don't Repeat Yourself), no copy/pasting. Copy/pasting leads to less maintainable code and makes it harder to ensure you've updated errors because you have to go to every place and update accordingly. By making the solution generic and then applying the generic solution to your specific concern you make the code much more reusable.
Looking at TablePart2
what is it doing? Doing some HTTP request, cleaning, getting a table and populating it on a range. Naming is hard and you have to figure it out. I'm not entirely sure what to name it, for now I'll use PopulateTable
.
Where there are differences in this process you'll see they vary by the population of data(x, y)
and where you set oRange
starting from (and the oRange.Replace()
call). Supplying an argument to the parameter will allow you to take care of this making the code more generic.
Public SomeMethod(ByVal suppliedParameter as string)
The above describes the method signature. What it accepts as a parameter. Below is how it's used, and the argument that's supplied
SomeMethod "suppliedArgument"
What comes from an initial refactoring Private Sub TablePart2(ByVal Ssilka As String, ByVal iLoop As Long, ByVal book1 As Workbook)
and TablePart3
become Private Sub PopulateTable(ByVal populateFrom as DataFrom, ByVal Ssilka As String, ByVal iLoop As Long, ByVal book1 As Workbook)
.
You'll see that the first parameter populateFrom
uses a parameter that's defined by an Enum
(enumeration).
Private Enum DataFrom
InnerText
GetAttribute
End Enum
This Enum will allow you to explicitly specify what want to use.
Now inside of PopulateTable
you check on populateFrom
and make decisions based on that. What follows is the refactoring of the code I was able to do.
Cleaned up code.
Option Explicit
Private Enum DataFrom
InnerText
GetAttribute
End Enum
Private Const getAttributePopulationRow As Long = 110
Private Const innerTextPopulationRow As Long = 185
Private Const columnOffsetMultiple As Long = 21
Private Const columnOffset As Long = 26
Sub Softãèïåðññûëêè()
Application.DisplayAlerts = False
Call mainìàññèâû
Application.DisplayAlerts = True
End Sub
Private Sub mainìàññèâû()
Dim sheetNames(1 To 19) As String
Dim i As Long
For i = LBound(sheetNames) To UBound(sheetNames)
sheetNames(i) = "Ëèñò" & i
Next
Dim book1 As Workbook
Set book1 = Workbooks.Open("E:\Super M\Ïðîåêò ñòàâêè\Ïîèñê ðåøåíèÿ\Óñîâ 7\Óñëîâèÿ äëÿ àíäåðäîãîâ6円.xlsm")
Dim iLoop As Long
iLoop = -1
With book1.Worksheets("Ëèñò1").Range("R34:R99")
Dim r As Range
For Each r In .Rows
If r.Value = 1 Then
iLoop = iLoop + 1
Dim Ssilka As String
Ssilka = r.Offset(, -13).Hyperlinks.Item(1).Address
.Parent.Parent.Worksheets(sheetNames(1)).Activate
.Parent.Parent.Save
extractTable Ssilka, book1, iLoop
End If
Next r
End With
book1.Save
book1.Close
Exit Sub
End Sub
Private Sub extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
PopulateTable GetAttribute, Ssilka, iLoop, book1
PopulateTable InnerText, Ssilka, iLoop, book1
TablePart4 iLoop, book1
End Sub
Private Sub PopulateTable(ByVal populateFrom As DataFrom, ByVal Ssilka As String, ByVal iLoop As Long, ByVal book1 As Workbook)
' get page
Dim oHttp As Object
Set oHttp = CreateObject("MSXML2.XMLHTTP")
oHttp.Open "GET", Ssilka, False
oHttp.Send
' cleanup response
Dim sResponse As String
sResponse = StrConv(oHttp.responseBody, vbUnicode)
Set oHttp = Nothing
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With CreateObject("vbscript.regexp")
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</1円>"
sResponse = .Replace(sResponse, "")
End With
' create Document from response
Dim oDom As Object
Set oDom = CreateObject("htmlFile")
oDom.Write sResponse
DoEvents
' table with results, indexes starts with zero
Dim oTable As Object
Set oTable = oDom.getelementsbytagname("table")(3)
DoEvents
Dim iRows As Long
iRows = oTable.Rows.Length
Dim iCols As Long
iCols = oTable.Rows(1).Cells.Length
' first row and first column contain no intresting data
Dim data()
ReDim data(1 To iRows - 1, 1 To iCols - 1)
' fill in data array
Dim x As Long
For x = 1 To iRows - 1
Dim oRow As Object
Set oRow = oTable.Rows(x)
Dim y As Long
For y = 1 To iCols - 1
If oRow.Cells(y).Children.Length > 0 Then
If populateFrom = GetAttribute Then
data(x, y) = oRow.Cells(y).getelementsbytagname("a")(0).GetAttribute("href")
ElseIf populateFrom = InnerText Then
data(x, y) = oRow.Cells(y).InnerText
End If
End If
Next y
Next x
Set oRow = Nothing
Set oTable = Nothing
Set oDom = Nothing
' put data array on worksheet
Dim startRow As Long
If populateFrom = GetAttribute Then
startRow = getAttributePopulationRow
ElseIf populateFrom = InnerText Then
startRow = innerTextPopulationRow
End If
With book1.ActiveSheet.Cells(startRow, columnOffset + (iLoop * columnOffsetMultiple)).Resize(iRows - 1, iCols - 1)
.NumberFormat = "@"
.Value = data
If populateFrom = GetAttribute Then
.Replace What:="about:", Replacement:="http://allscores.ru/soccer/"
End If
End With
End Sub
Private Sub TablePart4(ByVal iLoop As Long, ByVal book1 As Workbook)
Const rowOffset As Long = 34
Dim A As Long
For A = 0 To 4
Dim B As Long
For B = 0 To 65
Dim Perem1 As String
Perem1 = book1.ActiveSheet.Cells(getAttributePopulationRow + B, (columnOffset + (iLoop * columnOffsetMultiple)) + A).Value2
Dim Perem2 As String
Perem2 = book1.ActiveSheet.Cells(innerTextPopulationRow + B, (columnOffset + (iLoop * columnOffsetMultiple)) + A).Value2
book1.ActiveSheet.Hyperlinks.Add Anchor:=Cells(rowOffset + B, (columnOffset + (iLoop * columnOffsetMultiple)) + A), Address:=Perem1, TextToDisplay:=Perem2
Next
Next
End Sub
This simplifies the code. Now it's your turn to investigate how to speed it up.
-
\$\begingroup\$ Thanks for your work , but your simplifications only wins maximum 5 % of time . I need to win >80 % . And it is real , because >80% of time the program spend on getting data from website , pasting it in Excel ,replace every element , then doing same process with another data , then going throug two tables to create third table .But these processes are not simplified in any way .But this is the main goal . \$\endgroup\$maxim465– maxim4652017年11月14日 06:13:36 +00:00Commented Nov 14, 2017 at 6:13
-
\$\begingroup\$ There might be ways of improving the execution but without knowing the workbook contents I don't think there's much more that can be done. \$\endgroup\$IvenBach– IvenBach2017年11月14日 17:17:34 +00:00Commented Nov 14, 2017 at 17:17
-
\$\begingroup\$ how about recordset ? \$\endgroup\$maxim465– maxim4652017年11月15日 05:53:59 +00:00Commented Nov 15, 2017 at 5:53