RT,今天测试了200000组数据,随机值为区间范围[200,720]的随机整数,测试结果如图:
代码如下:
Sub 命令式去重复()
Dim arr, r As Long, t As Single
t = Timer
r = Cells(Rows.Count, 1).End(xlUp).Row
arr = Range("a3:a" & r)
Range("b2:b" & r).ClearContents
[b3].Resize(UBound(arr)) = arr
Range("b3:b" & r).RemoveDuplicates _
Columns:=1, Header:=xlNo
[b2] = Format(Timer - t, "0.0000秒")
End Sub
Sub 字典法去重复()
Dim arr, dic_keys
Dim r As Long
Dim t As Single
Dim dic As Object
t = Timer
Set dic = CreateObject("Scripting.Dictionary")
r = Cells(Rows.Count, 1).End(xlUp).Row
arr = Range("a3:a" & r)
For i = 1 To UBound(arr)
dic(arr(i, 1)) = ""
Next
dic_keys = dic.Keys
ReDim brr(1 To dic.Count, 1 To 1)
For i = 1 To dic.Count
brr(i, 1) = dic_keys(i - 1)
Next
Range("c2:c" & r).ClearContents
[c3].Resize(dic.Count) = brr
[c2] = Format(Timer - t, "0.0000秒")
End Sub
Sub 数组法去重复()
Dim arr, brr, tmp
Dim i As Long, n As Long
Dim r As Long, t As Single
t = Timer
r = Cells(Rows.Count, 1).End(xlUp).Row
arr = Range("a3:a" & r)
ReDim brr(1 To UBound(arr), 1 To 1)
For i = 1 To UBound(arr)
tmp = arr(i, 1)
For j = 1 To i
If brr(j, 1) = tmp Then GoTo line
Next
n = n + 1
brr(n, 1) = tmp
line:
Next
Range("d2:d" & r).ClearContents
[d3].Resize(n) = brr
[d2] = Format(Timer - t, "0.0000秒")
End Sub
Sub 标记法去重复()
Dim arr, brr, tmp
Dim i As Long, found As Long
Dim t As Single, r As Long
t = Timer
r = Cells(Rows.Count, 1).End(xlUp).Row
arr = Range("a3:a" & r)
ReDim brr(1 To UBound(arr), 1 To 1)
For i = 1 To UBound(arr)
tmp = arr(i, 1)
For j = 1 To i
If arr(j, 1) = tmp Then
found = found + 1
End If
Next
If found = 1 Then
n = n + 1
brr(n, 1) = tmp
End If
found = 0
Next
Range("e2:e" & r).ClearContents
[e3].Resize(n) = brr
[e2] = Format(Timer - t, "0.0000秒")
End Sub
Sub 生成随机数()
Dim i As Long, arr(1 To 200000, 1 To 1)
For i = 1 To 200000
Randomize
arr(i, 1) = 520 * Rnd \ 1 + 200
Next
[a3:a200002].ClearContents
[a3].Resize(200000) = arr
End Sub
目前看来当关键字很少,字典取重复效率最高。那么问题来了,如果不用字典,请问代码怎么优化?
代码如下:
Sub 命令式去重复()
Dim arr, r As Long, t As Single
t = Timer
r = Cells(Rows.Count, 1).End(xlUp).Row
arr = Range("a3:a" & r)
Range("b2:b" & r).ClearContents
[b3].Resize(UBound(arr)) = arr
Range("b3:b" & r).RemoveDuplicates _
Columns:=1, Header:=xlNo
[b2] = Format(Timer - t, "0.0000秒")
End Sub
Sub 字典法去重复()
Dim arr, dic_keys
Dim r As Long
Dim t As Single
Dim dic As Object
t = Timer
Set dic = CreateObject("Scripting.Dictionary")
r = Cells(Rows.Count, 1).End(xlUp).Row
arr = Range("a3:a" & r)
For i = 1 To UBound(arr)
dic(arr(i, 1)) = ""
Next
dic_keys = dic.Keys
ReDim brr(1 To dic.Count, 1 To 1)
For i = 1 To dic.Count
brr(i, 1) = dic_keys(i - 1)
Next
Range("c2:c" & r).ClearContents
[c3].Resize(dic.Count) = brr
[c2] = Format(Timer - t, "0.0000秒")
End Sub
Sub 数组法去重复()
Dim arr, brr, tmp
Dim i As Long, n As Long
Dim r As Long, t As Single
t = Timer
r = Cells(Rows.Count, 1).End(xlUp).Row
arr = Range("a3:a" & r)
ReDim brr(1 To UBound(arr), 1 To 1)
For i = 1 To UBound(arr)
tmp = arr(i, 1)
For j = 1 To i
If brr(j, 1) = tmp Then GoTo line
Next
n = n + 1
brr(n, 1) = tmp
line:
Next
Range("d2:d" & r).ClearContents
[d3].Resize(n) = brr
[d2] = Format(Timer - t, "0.0000秒")
End Sub
Sub 标记法去重复()
Dim arr, brr, tmp
Dim i As Long, found As Long
Dim t As Single, r As Long
t = Timer
r = Cells(Rows.Count, 1).End(xlUp).Row
arr = Range("a3:a" & r)
ReDim brr(1 To UBound(arr), 1 To 1)
For i = 1 To UBound(arr)
tmp = arr(i, 1)
For j = 1 To i
If arr(j, 1) = tmp Then
found = found + 1
End If
Next
If found = 1 Then
n = n + 1
brr(n, 1) = tmp
End If
found = 0
Next
Range("e2:e" & r).ClearContents
[e3].Resize(n) = brr
[e2] = Format(Timer - t, "0.0000秒")
End Sub
Sub 生成随机数()
Dim i As Long, arr(1 To 200000, 1 To 1)
For i = 1 To 200000
Randomize
arr(i, 1) = 520 * Rnd \ 1 + 200
Next
[a3:a200002].ClearContents
[a3].Resize(200000) = arr
End Sub
目前看来当关键字很少,字典取重复效率最高。那么问题来了,如果不用字典,请问代码怎么优化?