Log(SortedLetters)
Sub AppStart (Args() As String)
ShowAnswer
StartMessageLoop
End Sub
Sub ShowAnswer
'Dim start As Long = DateTime.Now
Wait For (CountAndSortLetters) Complete (SortedLetters As List)
Log(SortedLetters)
'Dim elapsed As Long = DateTime.Now - start
'Log("Elapsed=" & elapsed & "ms")
End Sub
Public Sub CountAndSortLetters As ResumableSub '(FilePath As String, FileName As String) As List
' 1. Fast file read into a string list
Dim Lines As List '= File.ReadList(FilePath, FileName)
Dim words As String
Dim job As HttpJob
job.Initialize("", Me)
job.Download("https://raw.githubusercontent.com/dwyl/english-words/refs/heads/master/words_alpha.txt")
Wait For (job) JobDone(job As HttpJob)
If job.Success Then
words = job.GetString
End If
job.Release
If words = "" Then Return Lines
Lines.Initialize
Lines = Regex.Split(CRLF, words)
' Log("size=" & Lines.Size)
' Log("first=" & Lines.Get(0))
' Log("last=" & Lines.Get(Lines.Size - 1))
' For i = 0 To Lines.Size - 1
' Log(Lines.Get(i))
' Next
'Start benchmark
Dim start As Long = DateTime.Now
' 2. Fixed array mapping to standard ASCII/English lowercase letters ('a' = 97 to 'z' = 122)
Dim Counts(26) As Int
' 3. Optimized iteration using fast character/code point inspection
For Each Word As String In Lines
Dim Length As Int = Word.Length
For i = 0 To Length - 1
Dim CodePoint As Int = Asc(Word.CharAt(i))
' Bounds check to ensure we only count english alpha characters
' If you need it case-insensitive, add a lowercase conversion helper
If CodePoint >= 97 And CodePoint <= 122 Then
Dim Index As Int = CodePoint - 97
Counts(Index) = Counts(Index) + 1
End If
Next
Next
' 4. Harness ListOfArrays (LOA) to cleanly package, map, and sort our structure
Dim LetterTable As ListOfArrays = LOAUtils.CreateEmpty(Array("Letter", "Occurrences", "LetterAndOccurences"))
For i = 0 To 25
Dim LetterStr As String = Chr(i + 97)
LetterTable.AddRow(Array(LetterStr, Counts(i), $"${LetterStr} (${Counts(i)})"$))
Next
' Sort by "Occurrences" descending (False = Descending)
LetterTable.Sort("Occurrences", False)
' Extract the single-dimension sorted column
Dim SortedLetters As List = LetterTable.GetColumn("Letter")
'Dim SortedLetters As List = LetterTable.GetColumn("LetterAndOccurences")
' End benchmark
Dim elapsed As Long = DateTime.Now - start
Log("Elapsed=" & elapsed & "ms")
Return SortedLetters
End Sub
Elapsed=247ms
(ArrayList) [e, i, a, o, n, s, r, t, l, c, u, p, d, m, h, g, y, b, f, v, k, w, z, x, q, j]
(ArrayList) [e (376455), i (313008), a (295792), o (251596), n (251435), s (250282), r (246143), t (230895), l (194915), c (152980), u (131495), p (113663), d (113192), m (105208), h (92368), g (82627), y (70581), b (63942), f (39238), v (33075), k (26814), w (22407), z (14757), x (10493), q (5883), j (5456)]
Private Sub B4XPage_Created (Root1 As B4XView)
Root = Root1
Dim marktime As Long = DateTime.now
Dim b() As Byte = File.ReadBytes(File.DirApp, "words_alpha.txt")
Dim cnts(26) As Int
For Each c As Int In b
If c >= 97 Then c = c - 97 Else c = c - 65
If c >= 0 And c <= 25 Then cnts(c) = cnts(c) + 1
Next
Dim LOA As ListOfArrays = LOAUtils.CreateEmpty(Array("Letter", "Count"))
For i = 0 To 25
LOA.AddRow(Array(Chr(i + 97), cnts(i)))
Next
LOA.Sort("Count", False)
Log(LOA.GetColumn("Letter")) '(ArrayList) [e, i, a, o, n, s, r, t, l, c, u, p, d, m, h, g, y, b, f, v, k, w, z, x, q, j]
Log(DateTime.Now - marktime) '23
End Sub
Sub Slower
Dim n As Long = DateTime.Now
Dim words As List = File.ReadList("C:\Users\H\Downloads\words_alpha.txt", "")
Dim letters As Map = CreateMap()
For Each word As String In words
For i = 0 To word.Length - 1
Dim c As Char = word.CharAt(i)
Dim count As Int = letters.GetDefault(c, 0)
letters.Put(c, count + 1)
Next
Next
Dim loa As ListOfArrays = LOAUtils.CreateFromMap(Array("letter", "count"), letters)
loa.Sort("count", False)
Log(loa.ToString(0))
Dim SortedLetters As List = loa.GetColumn("letter")
Log(SortedLetters)
Log(DateTime.Now - n) '~200ms
End Sub
Sub Fast
Dim n As Long = DateTime.Now
Dim data() As Byte = File.ReadBytes("C:\Users\H\Downloads\words_alpha.txt", "")
Dim letters(26) As Int
For i = 0 To data.Length - 1
Dim c As Byte = data(i)
If c = 10 Or c = 13 Then Continue
letters(c - 97) = letters(c - 97) + 1
Next
Dim loa As ListOfArrays = LOAUtils.CreateEmpty(Array("letter", "count"))
For i = 0 To letters.Length - 1
loa.AddRow(Array(Chr(97 + i), letters(i)))
Next
Dim SortedLetters As List = loa.GetColumn("letter")
Log(SortedLetters)
Log(DateTime.Now - n) '~25ms
End Sub
Private Sub B4XPage_Created (Root1 As B4XView)
Root = Root1
Dim marktime As Long = DateTime.now
Dim words As List = File.ReadList(File.DirApp, "words_alpha.txt")
Dim cnts(26) As Int
Dim c As Int
For Each w As String In words
For i = 0 To w.Length -1
c = Asc(w.CharAt(i))
If c >= 97 Then c = c - 97 Else c = c - 65
If c >= 0 And c <= 25 Then cnts(c) = cnts(c) + 1
Next
Next
Dim LOA As ListOfArrays = LOAUtils.CreateEmpty(Array("Letter", "Count"))
For i = 0 To 25
LOA.AddRow(Array(Chr(i + 97), cnts(i)))
Next
LOA.Sort("Count", False)
Log(LOA.GetColumn("Letter")) '(ArrayList) [e, i, a, o, n, s, r, t, l, c, u, p, d, m, h, g, y, b, f, v, k, w, z, x, q, j]
Log(DateTime.Now - marktime) '58 msecs
End Sub
Private Sub B4XPage_Created (Root1 As B4XView)
Root = Root1
Dim bc As ByteConverter
Dim marktime As Long = DateTime.now
Dim chars() As Char = bc.ToChars(File.ReadString(File.DirApp, "words_alpha.txt"))
Log(chars.Length) '4234910
Log("Time since start: " & (DateTime.Now - marktime)) 'Time since start: 22
Dim cnts(26) As Int
Dim m As Int
For Each c As Char In chars
m = Asc(c)
If m >= 97 Then m = m - 97 Else m = m - 65
If m >= 0 And m <= 25 Then cnts(m) = cnts(m) + 1
Next
Log("Time since start: " & (DateTime.Now - marktime)) 'Time since start: 34
Dim LOA As ListOfArrays = LOAUtils.CreateEmpty(Array("Letter", "Count"))
For i = 0 To 25
LOA.AddRow(Array(Chr(i + 97), cnts(i)))
Next
LOA.Sort("Count", False)
Log(LOA.GetColumn("Letter")) '(ArrayList) [e, i, a, o, n, s, r, t, l, c, u, p, d, m, h, g, y, b, f, v, k, w, z, x, q, j]
Log("Time since start: " & (DateTime.Now - marktime)) 'Time since start: 40
End Sub
You can shave 18 msecs from the above post #9 by using ByteConverter to transform the whole file to an array of Char first.
For Letter = 1 To 26
Dim LetterCount As Int = cnts(64 + Letter) + cnts(96 + Letter) 'uppercase + lowercase
Log("letter " & Chr(64 + Letter) & " occurs " & LetterCount & " times")
Next
Private Sub Emexes_Version
Dim marktime As Long = DateTime.now
Dim b() As Byte = File.ReadBytes(File.DirApp, "words_alpha.txt")
Log("Time since start: " & (DateTime.Now - marktime)) 'Time since start: 5 [TO READ BYTES]
Dim cnts(256) As Int
For Each c As Int In b
cnts(c) = cnts(c) + 1
Next
Log("Time since start: " & (DateTime.Now - marktime)) 'Time since start: 10 [5 msecs more to COUNT]
Dim LOA As ListOfArrays = LOAUtils.CreateEmpty(Array("Letter", "Count"))
For letter = 1 To 26
LOA.AddRow(Array(Chr(letter + 96), cnts(64 + letter) + cnts(96 + letter)))
Next
Log("Time since start: " & (DateTime.Now - marktime)) 'Time since start: 12 [2 msecs more to create LOA]
LOA.Sort("Count", False)
Log(LOA.GetColumn("Letter")) '(ArrayList) [e, i, a, o, n, s, r, t, l, c, u, p, d, m, h, g, y, b, f, v, k, w, z, x, q, j]
Log("Time since start: " & (DateTime.Now - marktime)) 'Time since start: 16 [4 msecs more to sort and display LOA]
End Sub
We use cookies and similar technologies for the following purposes:
Do you accept cookies and these technologies?
We use cookies and similar technologies for the following purposes:
Do you accept cookies and these technologies?