1
\$\begingroup\$

This program parses a website. The program works well, but is too long. I want to simplify/speed it up.

How program works:

  1. First , the program finds needed hyperlink in Excel
  2. Then by the hyperlink , the program goes to the site , where it finds a certain table of elements. Then it takes out the "href" of each element , turns it into a hyperlink , and inserts it into Excel in the first table
  3. Then again by the hyperlink , the program goes to the site, where it finds a certain table of elements. Then it extracts the text of each element and inserts it into Excel in the second table
  4. Then it goes through the elements of the 1st and 2nd tables , so that in the 3rd table each element contains a "hyperlink +text"
Sub Softãèïåðññûëêè()
Application.DisplayAlerts = False
Call mainìàññèâû
Application.DisplayAlerts = True
End Sub
Sub mainìàññèâû()
Dim r As Range
Dim firstAddress As String
Dim iLoop As Long
Dim book1 As Workbook
Dim sheetNames(1 To 19) As String
Dim Ssilka As String
 '!!! 1. First , the program finds needed hyperlink
 sheetNames(1) = "Ëèñò1"
 sheetNames(2) = "Ëèñò2"
 sheetNames(3) = "Ëèñò3"
 sheetNames(4) = "Ëèñò4"
 sheetNames(5) = "Ëèñò5"
 sheetNames(6) = "Ëèñò6"
 sheetNames(7) = "Ëèñò7"
 sheetNames(8) = "Ëèñò8"
 sheetNames(9) = "Ëèñò9"
 sheetNames(10) = "Ëèñò10"
 sheetNames(11) = "Ëèñò11"
 sheetNames(12) = "Ëèñò12"
 sheetNames(13) = "Ëèñò13"
 sheetNames(14) = "Ëèñò14"
 sheetNames(15) = "Ëèñò15"
 sheetNames(16) = "Ëèñò16"
 sheetNames(17) = "Ëèñò17"
 sheetNames(18) = "Ëèñò18"
 sheetNames(19) = "Ëèñò19"
 Set book1 = Workbooks.Open("E:\Super M\Ïðîåêò ñòàâêè\Ïîèñê ðåøåíèÿ\Óñîâ 7\Óñëîâèÿ äëÿ àíäåðäîãîâ6円.xlsm")
 iLoop = -1
 With book1.Worksheets("Ëèñò1").Range("R34:R99")
 For Each r In .Rows
 If r.Value = 1 Then
 iLoop = iLoop + 1
 Ssilka = r.Offset(, -13).Hyperlinks.Item(1).Address
 .Parent.Parent.Worksheets(sheetNames(1)).Activate
 .Parent.Parent.Save
 extractTable Ssilka, book1, iLoop
 End If
 Next r
End With
book1.Save
book1.Close
Exit Sub
End Sub
Function extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
Dim oDom As Object, oTable As Object, oRow As Object
Dim iRows As Integer, iCols As Integer
Dim x As Integer, y As Integer
Dim data()
Dim oHttp As Object
Dim oRegEx As Object
Dim sResponse As String
Dim oRange As Range
Dim Perem1 As String
Dim Perem2 As String
 '!!!2. Then by the hyperlink , the program goes to the site , where it finds a certain table of elements.Then it takes out the "href" of each element , turns it into a hyperlink , and inserts it into Excel in the 1-st table
' get page
Set oHttp = CreateObject("MSXML2.XMLHTTP")
oHttp.Open "GET", Ssilka, False
oHttp.Send
' cleanup response
sResponse = StrConv(oHttp.responseBody, vbUnicode)
Set oHttp = Nothing
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Set oRegEx = CreateObject("vbscript.regexp")
With oRegEx
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</1円>"
sResponse = .Replace(sResponse, "")
End With
Set oRegEx = Nothing
 ' create Document from response
 Set oDom = CreateObject("htmlFile")
 oDom.Write sResponse
 DoEvents
 ' table with results, indexes starts with zero
 Set oTable = oDom.getelementsbytagname("table")(3)
DoEvents
iRows = oTable.Rows.Length
iCols = oTable.Rows(1).Cells.Length
' first row and first column contain no intresting data
ReDim data(1 To iRows - 1, 1 To iCols - 1)
' fill in data array
For x = 1 To iRows - 1
Set oRow = oTable.Rows(x)
For y = 1 To iCols - 1
 If oRow.Cells(y).Children.Length > 0 Then
 data(x, y) = oRow.Cells(y).getelementsbytagname("a")(0).getattribute("href")
 End If
Next y
Next x
Set oRow = Nothing
Set oTable = Nothing
Set oDom = Nothing
 ' put data array on worksheet
 Set oRange = book1.ActiveSheet.Cells(110, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
oRange.NumberFormat = "@"
oRange.Value = data
oRange.Replace What:="about:", Replacement:="http://allscores.ru/soccer/"
Set oRange = Nothing
 '!!!! 3. Then again by the hyperlink , the program goes to the site , where it finds a certain table of elements. Then it extracts the text of each element and inserts it into Excel in the 2-nd table
 ' get page
 Set oHttp = CreateObject("MSXML2.XMLHTTP")
 oHttp.Open "GET", Ssilka, False
 oHttp.Send
 ' cleanup response
 sResponse = StrConv(oHttp.responseBody, vbUnicode)
 Set oHttp = Nothing
 sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
 Set oRegEx = CreateObject("vbscript.regexp")
With oRegEx
.MultiLine = True
.Global = True
.IgnoreCase = False
.Pattern = "<(script|SCRIPT)[\w\W]+?</1円>"
sResponse = .Replace(sResponse, "")
End With
Set oRegEx = Nothing
 ' create Document from response
Set oDom = CreateObject("htmlFile")
oDom.Write sResponse
DoEvents
' table with results, indexes starts with zero
Set oTable = oDom.getelementsbytagname("table")(3)
DoEvents
iRows = oTable.Rows.Length
iCols = oTable.Rows(1).Cells.Length
' first row and first column contain no intresting data
ReDim data(1 To iRows - 1, 1 To iCols - 1)
' fill in data array
For x = 1 To iRows - 1
Set oRow = oTable.Rows(x)
 For y = 1 To iCols - 1
 If oRow.Cells(y).Children.Length > 0 Then
 data(x, y) = oRow.Cells(y).innerText
 End If
Next y
Next x
Set oRow = Nothing
Set oTable = Nothing
Set oDom = Nothing
' put data array on worksheet
Set oRange = book1.ActiveSheet.Cells(185, 26 + (iLoop * 21)).Resize(iRows - 1, iCols - 1)
oRange.NumberFormat = "@"
oRange.Value = data
Set oRange = Nothing
 '!!! 4. Then it goes through the elements of the 1-st and 2-nd tables , so that in the 3-rd table each element contains a "hyperlink +text"
 For A = 0 To 4
 For B = 0 To 65
 Perem1 = book1.ActiveSheet.Cells(110 + B, (26 + (iLoop * 21)) + A).Value
 Perem2 = book1.ActiveSheet.Cells(185 + B, (26 + (iLoop * 21)) + A).Value
 book1.ActiveSheet.Hyperlinks.Add Anchor:=Cells(34 + B, (26 + (iLoop * 21)) + A), Address:=Perem1, TextToDisplay:=Perem2
 Next
 Next
 End Function
200_success
145k22 gold badges190 silver badges478 bronze badges
asked Nov 13, 2017 at 7:28
\$\endgroup\$

1 Answer 1

1
\$\begingroup\$

Having consistent indentation will help readability. Declaring your variables closer to where they are used also helps clear things up, firstAddress isn't used anywhere.


Using Alt+D+L (Debug>Compile) brings attention to the variables A and B not being declared anywhere. Alt+T+O (Tools>Options) and making sure Require Variable Declaration is enabled will have future-you thanking current-you. Not having variable explicitly declared makes debugging needlessly harder.


Variables that are declared as Integer are converted to a Long, better to initially declare them as Long.


extractTable is a function but nothing is being returned. Functions have a value returned Public Function Foo(ByVal bar as long) as <Type>. The function is assigned to what it is returning set Foo = wb1.Range("A1:B2") or Foo = 9999 depending on whether it's an object or not. extractTable should be a Sub


Using the Value property on a Range object can cause rounding issues (A little reading: https://fastexcel.wordpress.com/2011/11/30/text-vs-value-vs-value2-slow-text-and-how-to-avoid-it/).


Magic numbers. In your code you have the values of 110, 185, 26, 21, 34, -13 that are doing things which aren't arbitrary. They are there for a purpose, but what purpose? Numbers don't mean a lot so use a Const constant to describe what they are doing. Again naming is hard and you'll have to figure out how to best name them. 110 may become Private Const getAttributePopulationRow As Long = 110.


The above covers the easiest stuff.

Instead of having 1 big Method that does several things it's preferable to have it do 1 thing. This reduces cognitive load and how much you have to remember at an given time. Imagine the below code. Seeing that is much easier to understand what's going on than remembering what 300+ lines of code is supposed to be doing.

Sub Foo()
 CreateATable
 ManipulateATable
 SaveTableInNewWorkbook "C:\SpecificPath\", "Filename.xlsm"
End Sub

Comments '!!!2,3,4 that's describing what you're doing indicates to me that's a new method trying to escape. Code should be self documenting, it should tell you what it's doing by reading it. Comments should describe why it's being done that way. After doing this kind of refactoring extractTable becomes.

Private Sub extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
 TablePart2 Ssilka, iLoop, book1
 TablePart3 Ssilka, iLoop, book1
 TablePart4 iLoop, book1
End Sub

Once that's done and you compare TablePart2 and TablePart3 you can see they are virtually identical.

When writing code you want to follow DRY (Don't Repeat Yourself), no copy/pasting. Copy/pasting leads to less maintainable code and makes it harder to ensure you've updated errors because you have to go to every place and update accordingly. By making the solution generic and then applying the generic solution to your specific concern you make the code much more reusable.


Looking at TablePart2 what is it doing? Doing some HTTP request, cleaning, getting a table and populating it on a range. Naming is hard and you have to figure it out. I'm not entirely sure what to name it, for now I'll use PopulateTable.

Where there are differences in this process you'll see they vary by the population of data(x, y) and where you set oRange starting from (and the oRange.Replace() call). Supplying an argument to the parameter will allow you to take care of this making the code more generic.

Public SomeMethod(ByVal suppliedParameter as string)

The above describes the method signature. What it accepts as a parameter. Below is how it's used, and the argument that's supplied

SomeMethod "suppliedArgument"

What comes from an initial refactoring Private Sub TablePart2(ByVal Ssilka As String, ByVal iLoop As Long, ByVal book1 As Workbook) and TablePart3 become Private Sub PopulateTable(ByVal populateFrom as DataFrom, ByVal Ssilka As String, ByVal iLoop As Long, ByVal book1 As Workbook).

You'll see that the first parameter populateFrom uses a parameter that's defined by an Enum (enumeration).

Private Enum DataFrom
 InnerText
 GetAttribute
End Enum

This Enum will allow you to explicitly specify what want to use.

Now inside of PopulateTable you check on populateFrom and make decisions based on that. What follows is the refactoring of the code I was able to do.


Cleaned up code.

Option Explicit
Private Enum DataFrom
 InnerText
 GetAttribute
End Enum
Private Const getAttributePopulationRow As Long = 110
Private Const innerTextPopulationRow As Long = 185
Private Const columnOffsetMultiple As Long = 21
Private Const columnOffset As Long = 26
Sub Softãèïåðññûëêè()
 Application.DisplayAlerts = False
 Call mainìàññèâû
 Application.DisplayAlerts = True
End Sub
Private Sub mainìàññèâû()
 Dim sheetNames(1 To 19) As String
 Dim i As Long
 For i = LBound(sheetNames) To UBound(sheetNames)
 sheetNames(i) = "Ëèñò" & i
 Next
 Dim book1 As Workbook
 Set book1 = Workbooks.Open("E:\Super M\Ïðîåêò ñòàâêè\Ïîèñê ðåøåíèÿ\Óñîâ 7\Óñëîâèÿ äëÿ àíäåðäîãîâ6円.xlsm")
 Dim iLoop As Long
 iLoop = -1
 With book1.Worksheets("Ëèñò1").Range("R34:R99")
 Dim r As Range
 For Each r In .Rows
 If r.Value = 1 Then
 iLoop = iLoop + 1
 Dim Ssilka As String
 Ssilka = r.Offset(, -13).Hyperlinks.Item(1).Address
 .Parent.Parent.Worksheets(sheetNames(1)).Activate
 .Parent.Parent.Save
 extractTable Ssilka, book1, iLoop
 End If
 Next r
 End With
 book1.Save
 book1.Close
 Exit Sub
End Sub
Private Sub extractTable(Ssilka As String, book1 As Workbook, iLoop As Long)
 PopulateTable GetAttribute, Ssilka, iLoop, book1
 PopulateTable InnerText, Ssilka, iLoop, book1
 TablePart4 iLoop, book1
End Sub
Private Sub PopulateTable(ByVal populateFrom As DataFrom, ByVal Ssilka As String, ByVal iLoop As Long, ByVal book1 As Workbook)
 ' get page
 Dim oHttp As Object
 Set oHttp = CreateObject("MSXML2.XMLHTTP")
 oHttp.Open "GET", Ssilka, False
 oHttp.Send
 ' cleanup response
 Dim sResponse As String
 sResponse = StrConv(oHttp.responseBody, vbUnicode)
 Set oHttp = Nothing
 sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
 With CreateObject("vbscript.regexp")
 .MultiLine = True
 .Global = True
 .IgnoreCase = False
 .Pattern = "<(script|SCRIPT)[\w\W]+?</1円>"
 sResponse = .Replace(sResponse, "")
 End With
 ' create Document from response
 Dim oDom As Object
 Set oDom = CreateObject("htmlFile")
 oDom.Write sResponse
 DoEvents
 ' table with results, indexes starts with zero
 Dim oTable As Object
 Set oTable = oDom.getelementsbytagname("table")(3)
 DoEvents
 Dim iRows As Long
 iRows = oTable.Rows.Length
 Dim iCols As Long
 iCols = oTable.Rows(1).Cells.Length
 ' first row and first column contain no intresting data
 Dim data()
 ReDim data(1 To iRows - 1, 1 To iCols - 1)
 ' fill in data array
 Dim x As Long
 For x = 1 To iRows - 1
 Dim oRow As Object
 Set oRow = oTable.Rows(x)
 Dim y As Long
 For y = 1 To iCols - 1
 If oRow.Cells(y).Children.Length > 0 Then
 If populateFrom = GetAttribute Then
 data(x, y) = oRow.Cells(y).getelementsbytagname("a")(0).GetAttribute("href")
 ElseIf populateFrom = InnerText Then
 data(x, y) = oRow.Cells(y).InnerText
 End If
 End If
 Next y
 Next x
 Set oRow = Nothing
 Set oTable = Nothing
 Set oDom = Nothing
 ' put data array on worksheet
 Dim startRow As Long
 If populateFrom = GetAttribute Then
 startRow = getAttributePopulationRow
 ElseIf populateFrom = InnerText Then
 startRow = innerTextPopulationRow
 End If
 With book1.ActiveSheet.Cells(startRow, columnOffset + (iLoop * columnOffsetMultiple)).Resize(iRows - 1, iCols - 1)
 .NumberFormat = "@"
 .Value = data
 If populateFrom = GetAttribute Then
 .Replace What:="about:", Replacement:="http://allscores.ru/soccer/"
 End If
 End With
End Sub
Private Sub TablePart4(ByVal iLoop As Long, ByVal book1 As Workbook)
 Const rowOffset As Long = 34
 Dim A As Long
 For A = 0 To 4
 Dim B As Long
 For B = 0 To 65
 Dim Perem1 As String
 Perem1 = book1.ActiveSheet.Cells(getAttributePopulationRow + B, (columnOffset + (iLoop * columnOffsetMultiple)) + A).Value2
 Dim Perem2 As String
 Perem2 = book1.ActiveSheet.Cells(innerTextPopulationRow + B, (columnOffset + (iLoop * columnOffsetMultiple)) + A).Value2
 book1.ActiveSheet.Hyperlinks.Add Anchor:=Cells(rowOffset + B, (columnOffset + (iLoop * columnOffsetMultiple)) + A), Address:=Perem1, TextToDisplay:=Perem2
 Next
 Next
End Sub

This simplifies the code. Now it's your turn to investigate how to speed it up.

answered Nov 13, 2017 at 19:36
\$\endgroup\$
3
  • \$\begingroup\$ Thanks for your work , but your simplifications only wins maximum 5 % of time . I need to win >80 % . And it is real , because >80% of time the program spend on getting data from website , pasting it in Excel ,replace every element , then doing same process with another data , then going throug two tables to create third table .But these processes are not simplified in any way .But this is the main goal . \$\endgroup\$ Commented Nov 14, 2017 at 6:13
  • \$\begingroup\$ There might be ways of improving the execution but without knowing the workbook contents I don't think there's much more that can be done. \$\endgroup\$ Commented Nov 14, 2017 at 17:17
  • \$\begingroup\$ how about recordset ? \$\endgroup\$ Commented Nov 15, 2017 at 5:53

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.