Bigfile: fix piece field bitmask to be used as bytearray consistently (#1982)
* Bigfile: make Piecefield array a bytearray We want an array of characters. Py2 strings made sense to use as an array of characters, but Py3 strings are different and no longer a good choice. * Bigfile: store bits as binary instead of char * BigFile: rename to/from string -> to/from bytes Since the type was changed to bytearray.
This commit is contained in:
parent
1516d55a88
commit
ec6fd48b86
4 changed files with 69 additions and 62 deletions
|
@ -2,23 +2,24 @@ import array
|
|||
|
||||
|
||||
def packPiecefield(data):
|
||||
assert isinstance(data, bytes) or isinstance(data, bytearray)
|
||||
res = []
|
||||
if not data:
|
||||
return array.array("H", b"")
|
||||
|
||||
if data[0] == "0":
|
||||
if data[0] == b"\x00":
|
||||
res.append(0)
|
||||
find = "1"
|
||||
find = b"\x01"
|
||||
else:
|
||||
find = "0"
|
||||
find = b"\x00"
|
||||
last_pos = 0
|
||||
pos = 0
|
||||
while 1:
|
||||
pos = data.find(find, pos)
|
||||
if find == "0":
|
||||
find = "1"
|
||||
if find == b"\x00":
|
||||
find = b"\x01"
|
||||
else:
|
||||
find = "0"
|
||||
find = b"\x00"
|
||||
if pos == -1:
|
||||
res.append(len(data) - last_pos)
|
||||
break
|
||||
|
@ -29,31 +30,43 @@ def packPiecefield(data):
|
|||
|
||||
def unpackPiecefield(data):
|
||||
if not data:
|
||||
return ""
|
||||
return b""
|
||||
|
||||
res = []
|
||||
char = "1"
|
||||
char = b"\x01"
|
||||
for times in data:
|
||||
if times > 10000:
|
||||
return ""
|
||||
return b""
|
||||
res.append(char * times)
|
||||
if char == "1":
|
||||
char = "0"
|
||||
if char == b"\x01":
|
||||
char = b"\x00"
|
||||
else:
|
||||
char = "1"
|
||||
return "".join(res)
|
||||
char = b"\x01"
|
||||
return b"".join(res)
|
||||
|
||||
|
||||
class BigfilePiecefield(object):
|
||||
def spliceBit(data, idx, bit):
|
||||
assert bit == b"\x00" or bit == b"\x01"
|
||||
if len(data) < idx:
|
||||
data = data.ljust(idx + 1, b"\x00")
|
||||
return data[:idx] + bit + data[idx+ 1:]
|
||||
|
||||
class Piecefield(object):
|
||||
def tostring(self):
|
||||
return "".join(["1" if b else "0" for b in self.tobytes()])
|
||||
|
||||
|
||||
class BigfilePiecefield(Piecefield):
|
||||
__slots__ = ["data"]
|
||||
|
||||
def __init__(self):
|
||||
self.data = b""
|
||||
|
||||
def fromstring(self, s):
|
||||
def frombytes(self, s):
|
||||
assert isinstance(s, bytes) or isinstance(s, bytearray)
|
||||
self.data = s
|
||||
|
||||
def tostring(self):
|
||||
def tobytes(self):
|
||||
return self.data
|
||||
|
||||
def pack(self):
|
||||
|
@ -64,28 +77,24 @@ class BigfilePiecefield(object):
|
|||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return int(self.data[key])
|
||||
return self.data[key]
|
||||
except IndexError:
|
||||
return False
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
data = self.data
|
||||
if len(data) < key:
|
||||
data = data.ljust(key + 1, "0")
|
||||
data = data[:key] + str(int(value)) + data[key + 1:]
|
||||
self.data = data
|
||||
self.data = spliceBit(self.data, key, value)
|
||||
|
||||
|
||||
class BigfilePiecefieldPacked(object):
|
||||
class BigfilePiecefieldPacked(Piecefield):
|
||||
__slots__ = ["data"]
|
||||
|
||||
def __init__(self):
|
||||
self.data = b""
|
||||
|
||||
def fromstring(self, data):
|
||||
def frombytes(self, data):
|
||||
assert isinstance(data, bytes) or isinstance(data, bytearray)
|
||||
self.data = packPiecefield(data).tobytes()
|
||||
|
||||
def tostring(self):
|
||||
def tobytes(self):
|
||||
return unpackPiecefield(array.array("H", self.data))
|
||||
|
||||
def pack(self):
|
||||
|
@ -96,23 +105,20 @@ class BigfilePiecefieldPacked(object):
|
|||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return int(self.tostring()[key])
|
||||
return self.tobytes()[key]
|
||||
except IndexError:
|
||||
return False
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
data = self.tostring()
|
||||
if len(data) < key:
|
||||
data = data.ljust(key + 1, "0")
|
||||
data = data[:key] + str(int(value)) + data[key + 1:]
|
||||
self.fromstring(data)
|
||||
data = spliceBit(self.tobytes(), key, value)
|
||||
self.frombytes(data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
import psutil
|
||||
import time
|
||||
testdata = "1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1"
|
||||
testdata = b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01"
|
||||
meminfo = psutil.Process(os.getpid()).memory_info
|
||||
|
||||
for storage in [BigfilePiecefieldPacked, BigfilePiecefield]:
|
||||
|
@ -122,7 +128,7 @@ if __name__ == "__main__":
|
|||
piecefields = {}
|
||||
for i in range(10000):
|
||||
piecefield = storage()
|
||||
piecefield.fromstring(testdata[:i] + "0" + testdata[i + 1:])
|
||||
piecefield.frombytes(testdata[:i] + b"\x00" + testdata[i + 1:])
|
||||
piecefields[i] = piecefield
|
||||
|
||||
print("Create x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data)))
|
||||
|
|
|
@ -324,7 +324,7 @@ class ContentManagerPlugin(object):
|
|||
# Add the merkle root to hashfield
|
||||
hash_id = self.site.content_manager.hashfield.getHashId(hash)
|
||||
self.optionalDownloaded(inner_path, hash_id, file_size, own=True)
|
||||
self.site.storage.piecefields[hash].fromstring("1" * piece_num)
|
||||
self.site.storage.piecefields[hash].frombytes(b"\x01" * piece_num)
|
||||
|
||||
back[file_relative_path] = {"sha512": hash, "size": file_size, "piecemap": piecemap_relative_path, "piece_size": piece_size}
|
||||
return back
|
||||
|
@ -361,7 +361,7 @@ class ContentManagerPlugin(object):
|
|||
|
||||
# Mark piece downloaded
|
||||
piece_i = int(pos_from / file_info["piece_size"])
|
||||
self.site.storage.piecefields[file_info["sha512"]][piece_i] = True
|
||||
self.site.storage.piecefields[file_info["sha512"]][piece_i] = b"\x01"
|
||||
|
||||
# Only add to site size on first request
|
||||
if hash_id in self.hashfield:
|
||||
|
@ -460,15 +460,16 @@ class SiteStoragePlugin(object):
|
|||
if os.path.isfile(file_path):
|
||||
if sha512 not in self.piecefields:
|
||||
if open(file_path, "rb").read(128) == b"\0" * 128:
|
||||
piece_data = "0"
|
||||
piece_data = b"\x00"
|
||||
else:
|
||||
piece_data = "1"
|
||||
piece_data = b"\x01"
|
||||
self.log.debug("%s: File exists, but not in piecefield. Filling piecefiled with %s * %s." % (inner_path, piece_num, piece_data))
|
||||
self.piecefields[sha512].fromstring(piece_data * piece_num)
|
||||
self.piecefields[sha512].frombytes(piece_data * piece_num)
|
||||
else:
|
||||
self.log.debug("Creating bigfile: %s" % inner_path)
|
||||
self.createSparseFile(inner_path, file_info["size"], sha512)
|
||||
self.piecefields[sha512].fromstring("0" * piece_num)
|
||||
self.piecefields[sha512].frombytes(b"\x00" * piece_num)
|
||||
self.log.debug("Created bigfile: %s" % inner_path)
|
||||
return True
|
||||
|
||||
def openBigfile(self, inner_path, prebuffer=0):
|
||||
|
@ -595,7 +596,7 @@ class WorkerManagerPlugin(object):
|
|||
if not self.site.storage.isFile(inner_path):
|
||||
self.site.storage.createSparseFile(inner_path, file_info["size"], file_info["sha512"])
|
||||
piece_num = int(math.ceil(float(file_info["size"]) / file_info["piece_size"]))
|
||||
self.site.storage.piecefields[file_info["sha512"]].fromstring("0" * piece_num)
|
||||
self.site.storage.piecefields[file_info["sha512"]].frombytes(b"\x00" * piece_num)
|
||||
else:
|
||||
task = super(WorkerManagerPlugin, self).addTask(inner_path, *args, **kwargs)
|
||||
return task
|
||||
|
|
|
@ -137,8 +137,8 @@ class TestBigfile:
|
|||
bad_files = site_temp.storage.verifyFiles(quick_check=True)["bad_files"]
|
||||
assert not bad_files
|
||||
|
||||
# client_piecefield = peer_client.piecefields[file_info["sha512"]].tostring()
|
||||
# assert client_piecefield == "1" * 10
|
||||
# client_piecefield = peer_client.piecefields[file_info["sha512"]].tobytes()
|
||||
# assert client_piecefield == b"\x01" * 10
|
||||
|
||||
# Download 5. and 10. block
|
||||
|
||||
|
@ -187,7 +187,7 @@ class TestBigfile:
|
|||
|
||||
assert set(site_temp.content_manager.hashfield) == set([18343, 43727])
|
||||
|
||||
assert site_temp.storage.piecefields[f.sha512].tostring() == "0000010001"
|
||||
assert site_temp.storage.piecefields[f.sha512].tobytes() == b"\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01"
|
||||
assert f.sha512 in site_temp.getSettingsCache()["piecefields"]
|
||||
|
||||
# Test requesting already downloaded
|
||||
|
@ -219,26 +219,26 @@ class TestBigfile:
|
|||
@pytest.mark.parametrize("piecefield_obj", [BigfilePiecefield, BigfilePiecefieldPacked])
|
||||
def testPiecefield(self, piecefield_obj, site):
|
||||
testdatas = [
|
||||
"1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1",
|
||||
"010101" * 10 + "01" * 90 + "10" * 400 + "0" * 4999,
|
||||
"1" * 10000,
|
||||
"0" * 10000
|
||||
b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01",
|
||||
b"\x00\x01\x00\x01\x00\x01" * 10 + b"\x00\x01" * 90 + b"\x01\x00" * 400 + b"\x00" * 4999,
|
||||
b"\x01" * 10000,
|
||||
b"\x00" * 10000
|
||||
]
|
||||
for testdata in testdatas:
|
||||
piecefield = piecefield_obj()
|
||||
|
||||
piecefield.fromstring(testdata)
|
||||
assert piecefield.tostring() == testdata
|
||||
assert piecefield[0] == int(testdata[0])
|
||||
assert piecefield[100] == int(testdata[100])
|
||||
assert piecefield[1000] == int(testdata[1000])
|
||||
assert piecefield[len(testdata) - 1] == int(testdata[len(testdata) - 1])
|
||||
piecefield.frombytes(testdata)
|
||||
assert piecefield.tobytes() == testdata
|
||||
assert piecefield[0] == testdata[0]
|
||||
assert piecefield[100] == testdata[100]
|
||||
assert piecefield[1000] == testdata[1000]
|
||||
assert piecefield[len(testdata) - 1] == testdata[len(testdata) - 1]
|
||||
|
||||
packed = piecefield.pack()
|
||||
piecefield_new = piecefield_obj()
|
||||
piecefield_new.unpack(packed)
|
||||
assert piecefield.tostring() == piecefield_new.tostring()
|
||||
assert piecefield_new.tostring() == testdata
|
||||
assert piecefield.tobytes() == piecefield_new.tobytes()
|
||||
assert piecefield_new.tobytes() == testdata
|
||||
|
||||
def testFileGet(self, file_server, site, site_temp):
|
||||
inner_path = self.createBigfile(site)
|
||||
|
@ -345,7 +345,7 @@ class TestBigfile:
|
|||
# Create 10 fake peer for each piece
|
||||
for i in range(10):
|
||||
peer = Peer(file_server.ip, 1544, site_temp, server2)
|
||||
peer.piecefields[sha512][i] = "1"
|
||||
peer.piecefields[sha512][i] = b"\x01"
|
||||
peer.updateHashfield = mock.MagicMock(return_value=False)
|
||||
peer.updatePiecefields = mock.MagicMock(return_value=False)
|
||||
peer.findHashIds = mock.MagicMock(return_value={"nope": []})
|
||||
|
@ -430,7 +430,7 @@ class TestBigfile:
|
|||
time.sleep(0.5) # Wait prebuffer download
|
||||
|
||||
sha512 = site.content_manager.getFileInfo(inner_path)["sha512"]
|
||||
assert site_temp.storage.piecefields[sha512].tostring() == "0000011100"
|
||||
assert site_temp.storage.piecefields[sha512].tobytes() == b"\x00\x00\x00\x00\x00\x01\x01\x01\x00\x00"
|
||||
|
||||
# No prebuffer beyond end of the file
|
||||
f.seek(9 * 1024 * 1024)
|
||||
|
|
|
@ -60,13 +60,13 @@ class UiWebsocketPlugin(object):
|
|||
bigfile_sha512_cache[file_key] = sha512
|
||||
|
||||
if sha512 in site.storage.piecefields:
|
||||
piecefield = site.storage.piecefields[sha512].tostring()
|
||||
piecefield = site.storage.piecefields[sha512].tobytes()
|
||||
else:
|
||||
piecefield = None
|
||||
|
||||
if piecefield:
|
||||
row["pieces"] = len(piecefield)
|
||||
row["pieces_downloaded"] = piecefield.count("1")
|
||||
row["pieces_downloaded"] = piecefield.count(b"\x01")
|
||||
row["downloaded_percent"] = 100 * row["pieces_downloaded"] / row["pieces"]
|
||||
if row["pieces_downloaded"]:
|
||||
if row["pieces"] == row["pieces_downloaded"]:
|
||||
|
@ -86,10 +86,10 @@ class UiWebsocketPlugin(object):
|
|||
for peer in site.peers.values():
|
||||
if not peer.time_piecefields_updated or sha512 not in peer.piecefields:
|
||||
continue
|
||||
peer_piecefield = peer.piecefields[sha512].tostring()
|
||||
peer_piecefield = peer.piecefields[sha512].tobytes()
|
||||
if not peer_piecefield:
|
||||
continue
|
||||
if peer_piecefield == "1" * len(peer_piecefield):
|
||||
if peer_piecefield == b"\x01" * len(peer_piecefield):
|
||||
row["peer_seed"] += 1
|
||||
else:
|
||||
row["peer_leech"] += 1
|
||||
|
|
Loading…
Reference in a new issue