1
- """Performance data streaming performance"""
1
+ """Performance tests for data streaming."""
2
+
2
3
import os
3
4
import subprocess
4
5
import sys
15
16
16
17
17
18
class TestObjDBPerformance (TestBigRepoR ):
18
- large_data_size_bytes = 1000 * 1000 * 10 # some MiB should do it
19
- moderate_data_size_bytes = 1000 * 1000 * 1 # just 1 MiB
19
+ large_data_size_bytes = 1000 * 1000 * 10 # Some MiB should do it.
20
+ moderate_data_size_bytes = 1000 * 1000 * 1 # Just 1 MiB.
20
21
21
22
@with_rw_repo ("HEAD" , bare = True )
22
23
def test_large_data_streaming (self , rwrepo ):
23
- # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
24
- # It should be shared if possible
24
+ # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream.
25
+ # It should be shared if possible.
25
26
ldb = LooseObjectDB (osp .join (rwrepo .git_dir , "objects" ))
26
27
27
28
for randomize in range (2 ):
@@ -32,7 +33,7 @@ def test_large_data_streaming(self, rwrepo):
32
33
elapsed = time () - st
33
34
print ("Done (in %f s)" % elapsed , file = sys .stderr )
34
35
35
- # writing - due to the compression it will seem faster than it is
36
+ # Writing - due to the compression it will seem faster than it is.
36
37
st = time ()
37
38
binsha = ldb .store (IStream ("blob" , size , stream )).binsha
38
39
elapsed_add = time () - st
@@ -45,7 +46,7 @@ def test_large_data_streaming(self, rwrepo):
45
46
msg %= (size_kib , fsize_kib , desc , elapsed_add , size_kib / elapsed_add )
46
47
print (msg , file = sys .stderr )
47
48
48
- # reading all at once
49
+ # Reading all at once.
49
50
st = time ()
50
51
ostream = ldb .stream (binsha )
51
52
shadata = ostream .read ()
@@ -57,7 +58,7 @@ def test_large_data_streaming(self, rwrepo):
57
58
msg %= (size_kib , desc , elapsed_readall , size_kib / elapsed_readall )
58
59
print (msg , file = sys .stderr )
59
60
60
- # reading in chunks of 1 MiB
61
+ # Reading in chunks of 1 MiB.
61
62
cs = 512 * 1000
62
63
chunks = []
63
64
st = time ()
@@ -86,7 +87,7 @@ def test_large_data_streaming(self, rwrepo):
86
87
file = sys .stderr ,
87
88
)
88
89
89
- # del db file so git has something to do
90
+ # del db file so git has something to do.
90
91
ostream = None
91
92
import gc
92
93
@@ -95,34 +96,34 @@ def test_large_data_streaming(self, rwrepo):
95
96
96
97
# VS. CGIT
97
98
##########
98
- # CGIT ! Can using the cgit programs be faster ?
99
+ # CGIT! Can using the cgit programs be faster?
99
100
proc = rwrepo .git .hash_object ("-w" , "--stdin" , as_process = True , istream = subprocess .PIPE )
100
101
101
- # write file - pump everything in at once to be a fast as possible
102
- data = stream .getvalue () # cache it
102
+ # Write file - pump everything in at once to be a fast as possible.
103
+ data = stream .getvalue () # Cache it.
103
104
st = time ()
104
105
proc .stdin .write (data )
105
106
proc .stdin .close ()
106
107
gitsha = proc .stdout .read ().strip ()
107
108
proc .wait ()
108
109
gelapsed_add = time () - st
109
110
del data
110
- assert gitsha == bin_to_hex (binsha ) # we do it the same way, right ?
111
+ assert gitsha == bin_to_hex (binsha ) # We do it the same way, right?
111
112
112
- # as its the same sha, we reuse our path
113
+ # As it's the same sha, we reuse our path.
113
114
fsize_kib = osp .getsize (db_file ) / 1000
114
115
msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
115
116
msg %= (size_kib , fsize_kib , desc , gelapsed_add , size_kib / gelapsed_add )
116
117
print (msg , file = sys .stderr )
117
118
118
- # compare .. .
119
+ # Compare .
119
120
print (
120
121
"Git-Python is %f %% faster than git when adding big %s files"
121
122
% (100.0 - (elapsed_add / gelapsed_add ) * 100 , desc ),
122
123
file = sys .stderr ,
123
124
)
124
125
125
- # read all
126
+ # Read all.
126
127
st = time ()
127
128
_hexsha , _typename , size , data = rwrepo .git .get_object_data (gitsha )
128
129
gelapsed_readall = time () - st
@@ -132,14 +133,14 @@ def test_large_data_streaming(self, rwrepo):
132
133
file = sys .stderr ,
133
134
)
134
135
135
- # compare
136
+ # Compare.
136
137
print (
137
138
"Git-Python is %f %% faster than git when reading big %sfiles"
138
139
% (100.0 - (elapsed_readall / gelapsed_readall ) * 100 , desc ),
139
140
file = sys .stderr ,
140
141
)
141
142
142
- # read chunks
143
+ # Read chunks.
143
144
st = time ()
144
145
_hexsha , _typename , size , stream = rwrepo .git .stream_object_data (gitsha )
145
146
while True :
@@ -158,7 +159,7 @@ def test_large_data_streaming(self, rwrepo):
158
159
)
159
160
print (msg , file = sys .stderr )
160
161
161
- # compare
162
+ # Compare.
162
163
print (
163
164
"Git-Python is %f %% faster than git when reading big %s files in chunks"
164
165
% (100.0 - (elapsed_readchunks / gelapsed_readchunks ) * 100 , desc ),
0 commit comments