'Non-UI application (console / server application)
#Region Project Attributes
#CommandLineArgs:
#MergeLibraries: True
#AdditionalJar: sqlite-jdbc-3.27.2.1
#AdditionalJar: bcprov-jdk15on-1.64
#End Region
Sub Process_Globals
Dim nbFiles As Long
Dim nbDir As Long
Dim FileDB As String = "fileinfo.db"
Dim sql As SQL
End Sub
Sub AppStart (Args() As String)
nbFiles = 0
nbDir = 0
CreateDB
' scan one or several folders
sql.BeginTransaction
ReadDir("i:\",True)
ReadDir("f:\",True)
ReadDir("g:\",True)
ReadDir("j:\",True)
sql.TransactionSuccessful
Log (nbFiles)
Log (nbDir)
'SearchDuplicate
sql.Close
End Sub
'Return true to allow the default exceptions handler to handle the uncaught exception.
Sub Application_Error (Error As Exception, StackTrace As String) As Boolean
Return True
End Sub
Sub ReadDir(folder As String, recursive As Boolean)
Dim lst As List = File.ListFiles(folder)
If lst.IsInitialized Then
For i = 0 To lst.Size - 1
If File.IsDirectory(folder,lst.Get(i)) Then
Dim v As String
v = folder&"\"&lst.Get(i)
nbDir = nbDir+1
If recursive Then
ReadDir(v,recursive)
End If
Else
Dim req As String = "INSERT INTO TFileInfo VALUES (?,?,?, ?, ?)"
sql.ExecNonQuery2(req, Array As Object (lst.Get(i), GetFileExt(lst.Get(i)).ToLowerCase, folder, File.Size(folder,lst.Get(i)),nbFiles))
nbFiles = nbFiles+1
If nbFiles Mod 1000 = 0 Then Log (nbFiles)
End If
Next
Else
Log (folder & " : non accessible")
End If
End Sub
Sub CreateDB
If File.Exists(File.DirApp,FileDB) Then File.Delete(File.DirApp, FileDB)
sql.InitializeSQLite(File.DirApp,FileDB, True)
sql.ExecNonQuery($"CREATE TABLE "TFileInfo"( "filename" TEXT,"extension" TEXT,"folder" TEXT,"filesize" INTEGER,"md5" TEXT)"$)
End Sub
Sub SearchDuplicate
Dim req As String = $"select rowid, * from tfileinfo
where filesize in (
SELECT filesize
FROM tfileinfo
where (filesize<> 0)
GROUP BY filesize
HAVING COUNT(filesize) > 1 )"$
Dim count As Int =0
Dim Cursor As ResultSet
Cursor = sql.ExecQuery(req)
Do While Cursor.NextRow
Dim h As String = Cursor.GetString("md5")
If h.Length <> 32 Then h = Hash(Cursor.GetString("folder"),Cursor.GetString("filename") )
sql.ExecNonQuery2("Update TFileInfo set md5=? Where rowid=?",Array As Object (h, Cursor.GetInt("rowid")) )
Log(Cursor.GetInt("rowid"))
count = count +1
Loop
Cursor.Close
Log ("total duplicate " & count)
' output a summary of duplicate files
req = $"Select nuple, count(nuple) as cn from (
Select count(md5) As nuple
FROM tfileinfo
GROUP BY md5
HAVING count(md5) > 1 )
Group By nuple "$
Cursor = sql.ExecQuery(req)
Do While Cursor.NextRow
Log(Cursor.GetString("nuple") & " " & Cursor.GetString("cn"))
Loop
End Sub
Sub Hash (dir As String, filename As String) As String
Dim in As InputStream
If File.Size(dir, filename) > 1000000000 Then
Log ("****** file ignore ****")
Log (filename & " " & File.Size(dir, filename) )
Return "TOO BIG"
End If
in = File.OpenInput(dir,filename)
Dim buffer(File.Size(dir, filename)) As Byte
in.ReadBytes(buffer, 0, buffer.length)
Dim Bconv As ByteConverter
Dim data(buffer.Length) As Byte
Dim md As MessageDigest
data = md.GetMessageDigest(buffer, "MD5")
Return Bconv.HexFromBytes(data)
End Sub
Sub GetFileExt(FullPath As String) As String
Return FullPath.SubString(FullPath.LastIndexOf(".")+1)
End Sub