2
\$\begingroup\$

This program reads all files in a folder and subfolders line by line, converts them in a supported character set, replaces all unsupported characters and inserts them in a SQL database. My idea was to insert all CSV files.

It works fine indeed but it takes a long time to insert all the data, especially for files that are larger than 100 MB. How could I improve in this code?

Imports System.IO
Imports System.Text
Imports System.Data
Imports System.Data.Sql
Imports System.Data.SqlClient
Imports System.Text.RegularExpressions
Imports System.Globalization
Public Class SQL_Data_Form
#Region "Felder"
 Private _table As String = ""
#End Region
 Structure SeekZeilen
 Dim erste_Zeile As String()
 Dim _Rest As String()
 End Structure
 Private Sub Form1_Load(sender As System.Object, e As System.EventArgs) Handles MyBase.Load
 Dim conn As New DBUmgebung.cdb
 conn.PruefPasswort("ASW@")
 End Sub
 Public Sub ReplaceChars(ByRef _Rest() As String)
 For i As Integer = 0 To _Rest.Length - 1
 _Rest(i) = _Rest(i).Replace("\t", Constants.vbTab).Replace("\n", Constants.vbLf).
 Replace("\r", Constants.vbCr).Replace("\""", """").Replace("\\", "\")
 Next
 End Sub
 Private Sub btnAusfuehren_Click(sender As System.Object, e As System.EventArgs) Handles btnAusführen.Click
 Dim ListPathFiles As List(Of String) = ListFiles.GetFileList(TextBox1.Text, False)
 For Each filePath As String In ListPathFiles 
 Dim fileInfo As New FileInfo(filePath)
 _table = fileInfo.Name.Replace(fileInfo.Extension, String.Empty)
 Try
 View("connect", 15, "")
 Dim conn As New DBUmgebung.cdb
 View("connection successful", 20, "")
 If Not conn.executeCommand("SELECT * FROM " & _table) Then
 Exit Sub
 End If
 View("table check... ", 25, _table)
 Catch ex As Exception
 MessageBox.Show(ex.Message)
 rtbSql.Focus()
 View("connection closed", 100, "")
 Exit Sub
 End Try
 Dim i As Integer
 Dim r As SeekZeilen
 Using reader As New StreamReader(filePath)
 Dim TextFileTable As DataTable = Nothing
 Dim line As String = reader.ReadLine()
 Me.ParseString(line)
 Dim memStream As New MemoryStream(Encoding.Default.GetBytes(line))
 Using TextFileReader As New Microsoft.VisualBasic.FileIO.TextFieldParser(memStream)
 TextFileReader.TextFieldType = FileIO.FieldType.Delimited
 TextFileReader.SetDelimiters(";") 
 r.erste_Zeile = TextFileReader.ReadFields()
 If TextFileTable Is Nothing Then
 TextFileTable = New DataTable("TextFileTable")
 For i = 0 To r.erste_Zeile.Length - 1
 Dim Column As New DataColumn(r.erste_Zeile(i))
 Column.ReadOnly = True
 TextFileTable.Columns.Add(Column)
 Next
 End If
 DataGridView1.DataSource = TextFileTable
 End Using
 While Not reader.EndOfStream
 line = reader.ReadLine()
 Me.ParseString(line)
 memStream = New MemoryStream(Encoding.Default.GetBytes(line))
 Using TextFileReader As New Microsoft.VisualBasic.FileIO.TextFieldParser(memStream)
 TextFileReader.TextFieldType = FileIO.FieldType.Delimited
 TextFileReader.SetDelimiters(";") 
 DataGridView1.DataSource = TextFileTable
 Try
 r._Rest = TextFileReader.ReadFields()
 ReplaceChars(r._Rest)
 If Not r._Rest Is Nothing Then
 Dim oSQL As New DBUmgebung.cdb.SQL()
 oSQL.init()
 AddParameters(oSQL, r)
 oSQL.ausfuehrenSQL(DBUmgebung.cdb.KSQLCommand.INSERT, _table, "")
 View("insert: ", 35, String.Join(", ", r._Rest))
 Dim dtRow As DataRow = TextFileTable.NewRow
 For i = 0 To r._Rest.Length - 1
 dtRow(i) = r._Rest(i).ToString()
 Next
 TextFileTable.Rows.Add(dtRow)
 DataGridView1.Refresh()
 Application.DoEvents()
 End If
 Catch ex As Microsoft.VisualBasic.FileIO.MalformedLineException
 MsgBox("Error! " & ex.Message & _
 "")
 Catch sqlEx As SqlException
 MessageBox.Show(sqlEx.Message)
 Debug.WriteLine("SQL Error!")
 rtbSql.Focus()
 Exit For
 Catch ex As Exception
 MessageBox.Show(ex.Message)
 rtbSql.Focus()
 Exit For
 End Try
 End Using
 End While
 End Using
 Next
 End Sub
 Private Sub ParseString(ByRef value As String)
 If value.Contains("\"""";") Then
 value = value.Replace("\"""";", "\"""""";")
 value = value.Replace("\""""" & Environment.NewLine, "\""""""" & Environment.NewLine)
 End If
 If value.Contains("\""") Then
 value = value.Replace("\""""", Constants.vbNullChar)
 value = value.Replace("\""", "\""""")
 value = value.Replace(Constants.vbNullChar, "\""""")
 End If
 End Sub
 Private Sub View(Message As String, value As Integer, pathname As String)
 RichTextBox1.AppendText(Message & pathname & Environment.NewLine)
 ProgressBar1.Value = value
 End Sub
 Private Sub TextBox1_MouseDoubleClick(sender As System.Object, e As System.Windows.Forms.MouseEventArgs) Handles TextBox1.MouseDoubleClick
 Dim result As DialogResult = FolderBrowserDialog1.ShowDialog()
 If result = Windows.Forms.DialogResult.OK Then
 TextBox1.Text = FolderBrowserDialog1.SelectedPath
 End If
 End Sub
 Private Sub SQL_Data_Form_FormClosing(sender As System.Object, e As System.Windows.Forms.FormClosingEventArgs) Handles MyBase.FormClosing
 Dim result As DialogResult = MessageBox.Show(Me, "close the program?", "", MessageBoxButtons.YesNo)
 If result = Windows.Forms.DialogResult.Yes Then
 e.Cancel = False
 Else
 e.Cancel = True
 End If
 End Sub
 Private Sub Button2_Click(sender As System.Object, e As System.EventArgs) Handles Button2.Click
 Dim conn As New DBUmgebung.cdb
 View("connect", 15, _table)
 Try
 View("querying...", 50, "")
 conn.executeCommand(rtbSql.Text)
 View("Query successful", 90, "")
 Catch ex As Exception
 MessageBox.Show(ex.Message)
 End Try
 View("connection closed", 100, "")
 End Sub
 Private Sub DataGridView1_CellContentClick(sender As System.Object, e As System.Windows.Forms.DataGridViewCellEventArgs) Handles DataGridView1.CellContentClick
 End Sub
 Private Sub AddParameters(ByRef db As DBUmgebung.cdb.SQL, r As SeekZeilen)
 Try
 Dim dummyDate As Date
 For i As Integer = 0 To r.erste_Zeile.Length - 1
 Dim value As String = ZeichensatzAnpassen(r._Rest(i))
 If value = "" Then
 db.FeldWertChar(r.erste_Zeile(i), DBNull.Value)
 ElseIf value Is Nothing OrElse IsDBNull(value) Then
 db.FeldWertChar(r.erste_Zeile(i), DBNull.Value)
 ElseIf Date.TryParseExact(value, "dd.MM.yyyy", CultureInfo.CurrentCulture, DateTimeStyles.None, dummyDate) Then
 db.FeldWertDate(r.erste_Zeile(i), value)
 ElseIf IsNumeric(value) Then
 If value.Contains(",") Then
 db.FeldWertDecimal(r.erste_Zeile(i), value.Replace(",", "."))
 Else
 db.FeldWertInteger(r.erste_Zeile(i), value)
 End If
 Else
 db.FeldWertChar(r.erste_Zeile(i), value)
 End If
 Next
 Catch ex As Exception
 MessageBox.Show(ex.Message)
 End Try
 End Sub
 Public Shared Function ZeichensatzAnpassen(ByVal input As String) As String
 Dim OriginalCodierung As System.Text.Encoding = System.Text.Encoding.GetEncoding("utf-8")
 Dim iso8859 As System.Text.Encoding = System.Text.Encoding.GetEncoding(1252)
 Dim enc As System.Text.Encoding = System.Text.Encoding.Default
 Dim OriginalBytes As Byte() = OriginalCodierung.GetBytes(input)
 Dim result As Byte() = System.Text.Encoding.Convert(OriginalCodierung, iso8859, OriginalBytes)
 Dim s As String = enc.GetString(result)
 Return s
 End Function
End Class
Public Class ListFiles
 Public Shared Function GetFileList(ByVal Root As String, Optional ByVal SubFolders As Boolean = True) As List(Of String)
 Dim FileList As New List(Of String)
 SeekFiles(Root, FileList, SubFolders)
 Return FileList
 End Function
 Private Shared Sub SeekFiles(ByVal Root As String, ByRef FileArray As List(Of String), ByVal SubFolders As Boolean)
 Try
 Dim Files() As String = System.IO.Directory.GetFiles(Root)
 Dim Folders() As String = System.IO.Directory.GetDirectories(Root)
 For i As Integer = 0 To UBound(Files)
 FileArray.Add(Files(i).ToString)
 Next
 If SubFolders = True Then
 For i As Integer = 0 To UBound(Folders)
 SeekFiles(Folders(i), FileArray, SubFolders)
 Next
 End If
 Catch Ex As Exception
 End Try
 End Sub
End Class
Tunaki
9,3011 gold badge31 silver badges46 bronze badges
asked May 11, 2016 at 8:56
\$\endgroup\$
3
  • \$\begingroup\$ Welcome to CodeReview! I edited a bit your post to clarify the wording. I hope you receive great answers! \$\endgroup\$ Commented May 11, 2016 at 9:30
  • \$\begingroup\$ "it takes a long time to insert all the data" - if the bottleneck is at the database side, consider wrapping all the inserts from a file inside a transaction. Based on the exact database system you are using, it may offer other ways of performing fast bulk-inserts (some can even be provided with the csv file). \$\endgroup\$ Commented May 11, 2016 at 10:44
  • \$\begingroup\$ after research i found out that microsoft text driver is the best solution, it reads much faster and it's integrated in all OS since XP Connection conCSV; sConnectCSV = "jdbc:odbc:Driver={Microsoft Text Driver (*.txt; *.csv)};Dbq=d:\\mmi\\;Extensions=csv;Persist Security Info=False;"; conCSV = DriverManager.getConnection(sConnectCSV,"",""); Statement stmt = conCSV.createStatement(); \$\endgroup\$ Commented May 11, 2016 at 13:36

2 Answers 2

3
\$\begingroup\$

The way you insert the data looks a bit problematic:

  1. You seem to be doing a full table scan just to check that the table exists: If Not conn.executeCommand("SELECT * FROM " & _table) Then Exit Sub
  2. After each line read you refresh the data table in the grid. If you want to display the values in a grid, just display them at the end.
  3. I'm not sure want your ParseString method does. Both VB.Net and CSV use "" for escaping a quote, not \"
  4. There's no need to create a Microsoft.VisualBasic.FileIO.TextFieldParser for each line. It can handle the entire file and also takes care of quotes

    Dim parser as New Microsoft.VisualBasic.FileIO.TextFieldParser(fileName)
    parser.TextFieldType = FieldType.Delimited
    parser.Delimiters = New String(){";"}
    While Not parser.EndOfData
     Dim valuesFromCurrentLine = parser.ReadFields()
     'process the current line
    End While
    
  5. Loading a 100 MB file into a Datatable can have a significant memory-footprint. Consider sending the data one line at a time to the server
  6. Why do want to convert utf-8 text to 1252?
  7. The code for DBUmgebung.cdb.SQL is not provided so I can't comment on that.

At places you seem to be reinventing the wheel. You've created a class ListFiles for recursively retrieving the files from a folder, but .NET already provides this via Directory.GetFiles

Some design considerations:

  1. Don't mix UI code with the processing logic like that. .NET provides the BackgroundWorker
  2. Avoid passing arguments ByRef. String values are immutable so can be just as easily returned. Passing an array ByRef is useless unless you plan to allocate a different array.
  3. Use the .NET standard naming conventions for class/method/variable names.
answered May 11, 2016 at 11:23
\$\endgroup\$
1
  • \$\begingroup\$ yes, i need to convert utf8 because i get conversion error of codepage characters, and DBUmgebung.cdb.SQL ist just a Advantage Database driver \$\endgroup\$ Commented May 11, 2016 at 11:28
1
\$\begingroup\$

I improved the code a little and now the program inserts all lines in one transaction, therefore i use DataTable to save all the lines which are inserted in sql db, but it doesn't really read faster as i expected, is there any better reader than streamreader? for example Microsoft TextFileReaader? here's the code

 Private Sub btnAusfuehren_Click(sender As System.Object, e As System.EventArgs) Handles btnAusführen.Click
 Dim ListPathFiles As List(Of String) = ListFiles.GetFileList(TextBox1.Text, False)
 For Each filePath As String In ListPathFiles
 Dim fileInfo As New FileInfo(filePath)
 _table = fileInfo.Name.Replace(fileInfo.Extension, String.Empty)
 Try
 View("connect", 15, "")
 Dim conn As New DBUmgebung.cdb
 View("connection successful", 20, "")
 If Not conn.executeCommand("SELECT * FROM " & _table) Then
 Exit Sub
 End If
 View("table check... ", 25, _table)
 Catch ex As Exception
 MessageBox.Show(ex.Message)
 rtbSql.Focus()
 View("connection closed", 100, "")
 Exit Sub
 End Try
 Dim i As Integer
 Dim r As SeekZeilen
 Using reader As New StreamReader(filePath)
 Dim cdb As New DBUmgebung.cdb()
 Dim dtInserts As DataTable = cdb.GetDataTable("SELECT TOP 0 * FROM " & _table)
 dtInserts.TableName = _table
 Dim line As String = reader.ReadLine()
 While Not reader.EndOfStream
 line = reader.ReadLine()
 Me.ParseString(line)
 Dim memStream As New MemoryStream(Encoding.Default.GetBytes(line))
 Using TextFileReader As New Microsoft.VisualBasic.FileIO.TextFieldParser(memStream)
 TextFileReader.TextFieldType = FileIO.FieldType.Delimited
 TextFileReader.SetDelimiters(";") 
 DataGridView1.DataSource = dtInserts
 Try
 r._Rest = TextFileReader.ReadFields()
 ReplaceChars(r._Rest)
 If Not r._Rest Is Nothing Then
 View("insert: ", 35, String.Join(", ", r._Rest))
 Dim dtRow As DataRow = dtInserts.NewRow
 For i = 0 To r._Rest.Length - 1
 dtRow(i) = r._Rest(i).ToString()
 Next
 dtInserts.Rows.Add(dtRow)
 DataGridView1.Refresh()
 Application.DoEvents()
 End If
 Catch ex As Microsoft.VisualBasic.FileIO.MalformedLineException
 MsgBox("Error! " & ex.Message & _
 "")
 Catch sqlEx As SqlException
 MessageBox.Show(sqlEx.Message)
 Debug.WriteLine("SQL Error!")
 rtbSql.Focus()
 Exit For
 Catch ex As Exception
 MessageBox.Show(ex.Message)
 rtbSql.Focus()
 Exit For
 End Try
 End Using
 End While
 Dim sql As New DBUmgebung.cdb.SQL()
 sql.ausfuehrenSQLDT(DBUmgebung.cdb.KSQLCommand.INSERT, dtInserts.TableName, String.Empty, dtInserts)
 End Using
 Next
 End Sub
answered May 11, 2016 at 10:44
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.