Bigfile: fix piece field bitmask to be used as bytearray consistently (#1982)
* Bigfile: make Piecefield array a bytearray We want an array of characters. Py2 strings made sense to use as an array of characters, but Py3 strings are different and no longer a good choice. * Bigfile: store bits as binary instead of char * BigFile: rename to/from string -> to/from bytes Since the type was changed to bytearray.
This commit is contained in:
parent
1516d55a88
commit
ec6fd48b86
4 changed files with 69 additions and 62 deletions
|
@ -2,23 +2,24 @@ import array
|
||||||
|
|
||||||
|
|
||||||
def packPiecefield(data):
|
def packPiecefield(data):
|
||||||
|
assert isinstance(data, bytes) or isinstance(data, bytearray)
|
||||||
res = []
|
res = []
|
||||||
if not data:
|
if not data:
|
||||||
return array.array("H", b"")
|
return array.array("H", b"")
|
||||||
|
|
||||||
if data[0] == "0":
|
if data[0] == b"\x00":
|
||||||
res.append(0)
|
res.append(0)
|
||||||
find = "1"
|
find = b"\x01"
|
||||||
else:
|
else:
|
||||||
find = "0"
|
find = b"\x00"
|
||||||
last_pos = 0
|
last_pos = 0
|
||||||
pos = 0
|
pos = 0
|
||||||
while 1:
|
while 1:
|
||||||
pos = data.find(find, pos)
|
pos = data.find(find, pos)
|
||||||
if find == "0":
|
if find == b"\x00":
|
||||||
find = "1"
|
find = b"\x01"
|
||||||
else:
|
else:
|
||||||
find = "0"
|
find = b"\x00"
|
||||||
if pos == -1:
|
if pos == -1:
|
||||||
res.append(len(data) - last_pos)
|
res.append(len(data) - last_pos)
|
||||||
break
|
break
|
||||||
|
@ -29,31 +30,43 @@ def packPiecefield(data):
|
||||||
|
|
||||||
def unpackPiecefield(data):
|
def unpackPiecefield(data):
|
||||||
if not data:
|
if not data:
|
||||||
return ""
|
return b""
|
||||||
|
|
||||||
res = []
|
res = []
|
||||||
char = "1"
|
char = b"\x01"
|
||||||
for times in data:
|
for times in data:
|
||||||
if times > 10000:
|
if times > 10000:
|
||||||
return ""
|
return b""
|
||||||
res.append(char * times)
|
res.append(char * times)
|
||||||
if char == "1":
|
if char == b"\x01":
|
||||||
char = "0"
|
char = b"\x00"
|
||||||
else:
|
else:
|
||||||
char = "1"
|
char = b"\x01"
|
||||||
return "".join(res)
|
return b"".join(res)
|
||||||
|
|
||||||
|
|
||||||
class BigfilePiecefield(object):
|
def spliceBit(data, idx, bit):
|
||||||
|
assert bit == b"\x00" or bit == b"\x01"
|
||||||
|
if len(data) < idx:
|
||||||
|
data = data.ljust(idx + 1, b"\x00")
|
||||||
|
return data[:idx] + bit + data[idx+ 1:]
|
||||||
|
|
||||||
|
class Piecefield(object):
|
||||||
|
def tostring(self):
|
||||||
|
return "".join(["1" if b else "0" for b in self.tobytes()])
|
||||||
|
|
||||||
|
|
||||||
|
class BigfilePiecefield(Piecefield):
|
||||||
__slots__ = ["data"]
|
__slots__ = ["data"]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.data = b""
|
self.data = b""
|
||||||
|
|
||||||
def fromstring(self, s):
|
def frombytes(self, s):
|
||||||
|
assert isinstance(s, bytes) or isinstance(s, bytearray)
|
||||||
self.data = s
|
self.data = s
|
||||||
|
|
||||||
def tostring(self):
|
def tobytes(self):
|
||||||
return self.data
|
return self.data
|
||||||
|
|
||||||
def pack(self):
|
def pack(self):
|
||||||
|
@ -64,28 +77,24 @@ class BigfilePiecefield(object):
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
try:
|
try:
|
||||||
return int(self.data[key])
|
return self.data[key]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
data = self.data
|
self.data = spliceBit(self.data, key, value)
|
||||||
if len(data) < key:
|
|
||||||
data = data.ljust(key + 1, "0")
|
|
||||||
data = data[:key] + str(int(value)) + data[key + 1:]
|
|
||||||
self.data = data
|
|
||||||
|
|
||||||
|
class BigfilePiecefieldPacked(Piecefield):
|
||||||
class BigfilePiecefieldPacked(object):
|
|
||||||
__slots__ = ["data"]
|
__slots__ = ["data"]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.data = b""
|
self.data = b""
|
||||||
|
|
||||||
def fromstring(self, data):
|
def frombytes(self, data):
|
||||||
|
assert isinstance(data, bytes) or isinstance(data, bytearray)
|
||||||
self.data = packPiecefield(data).tobytes()
|
self.data = packPiecefield(data).tobytes()
|
||||||
|
|
||||||
def tostring(self):
|
def tobytes(self):
|
||||||
return unpackPiecefield(array.array("H", self.data))
|
return unpackPiecefield(array.array("H", self.data))
|
||||||
|
|
||||||
def pack(self):
|
def pack(self):
|
||||||
|
@ -96,23 +105,20 @@ class BigfilePiecefieldPacked(object):
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
try:
|
try:
|
||||||
return int(self.tostring()[key])
|
return self.tobytes()[key]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
data = self.tostring()
|
data = spliceBit(self.tobytes(), key, value)
|
||||||
if len(data) < key:
|
self.frombytes(data)
|
||||||
data = data.ljust(key + 1, "0")
|
|
||||||
data = data[:key] + str(int(value)) + data[key + 1:]
|
|
||||||
self.fromstring(data)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import os
|
import os
|
||||||
import psutil
|
import psutil
|
||||||
import time
|
import time
|
||||||
testdata = "1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1"
|
testdata = b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01"
|
||||||
meminfo = psutil.Process(os.getpid()).memory_info
|
meminfo = psutil.Process(os.getpid()).memory_info
|
||||||
|
|
||||||
for storage in [BigfilePiecefieldPacked, BigfilePiecefield]:
|
for storage in [BigfilePiecefieldPacked, BigfilePiecefield]:
|
||||||
|
@ -122,7 +128,7 @@ if __name__ == "__main__":
|
||||||
piecefields = {}
|
piecefields = {}
|
||||||
for i in range(10000):
|
for i in range(10000):
|
||||||
piecefield = storage()
|
piecefield = storage()
|
||||||
piecefield.fromstring(testdata[:i] + "0" + testdata[i + 1:])
|
piecefield.frombytes(testdata[:i] + b"\x00" + testdata[i + 1:])
|
||||||
piecefields[i] = piecefield
|
piecefields[i] = piecefield
|
||||||
|
|
||||||
print("Create x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data)))
|
print("Create x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data)))
|
||||||
|
|
|
@ -324,7 +324,7 @@ class ContentManagerPlugin(object):
|
||||||
# Add the merkle root to hashfield
|
# Add the merkle root to hashfield
|
||||||
hash_id = self.site.content_manager.hashfield.getHashId(hash)
|
hash_id = self.site.content_manager.hashfield.getHashId(hash)
|
||||||
self.optionalDownloaded(inner_path, hash_id, file_size, own=True)
|
self.optionalDownloaded(inner_path, hash_id, file_size, own=True)
|
||||||
self.site.storage.piecefields[hash].fromstring("1" * piece_num)
|
self.site.storage.piecefields[hash].frombytes(b"\x01" * piece_num)
|
||||||
|
|
||||||
back[file_relative_path] = {"sha512": hash, "size": file_size, "piecemap": piecemap_relative_path, "piece_size": piece_size}
|
back[file_relative_path] = {"sha512": hash, "size": file_size, "piecemap": piecemap_relative_path, "piece_size": piece_size}
|
||||||
return back
|
return back
|
||||||
|
@ -361,7 +361,7 @@ class ContentManagerPlugin(object):
|
||||||
|
|
||||||
# Mark piece downloaded
|
# Mark piece downloaded
|
||||||
piece_i = int(pos_from / file_info["piece_size"])
|
piece_i = int(pos_from / file_info["piece_size"])
|
||||||
self.site.storage.piecefields[file_info["sha512"]][piece_i] = True
|
self.site.storage.piecefields[file_info["sha512"]][piece_i] = b"\x01"
|
||||||
|
|
||||||
# Only add to site size on first request
|
# Only add to site size on first request
|
||||||
if hash_id in self.hashfield:
|
if hash_id in self.hashfield:
|
||||||
|
@ -460,15 +460,16 @@ class SiteStoragePlugin(object):
|
||||||
if os.path.isfile(file_path):
|
if os.path.isfile(file_path):
|
||||||
if sha512 not in self.piecefields:
|
if sha512 not in self.piecefields:
|
||||||
if open(file_path, "rb").read(128) == b"\0" * 128:
|
if open(file_path, "rb").read(128) == b"\0" * 128:
|
||||||
piece_data = "0"
|
piece_data = b"\x00"
|
||||||
else:
|
else:
|
||||||
piece_data = "1"
|
piece_data = b"\x01"
|
||||||
self.log.debug("%s: File exists, but not in piecefield. Filling piecefiled with %s * %s." % (inner_path, piece_num, piece_data))
|
self.log.debug("%s: File exists, but not in piecefield. Filling piecefiled with %s * %s." % (inner_path, piece_num, piece_data))
|
||||||
self.piecefields[sha512].fromstring(piece_data * piece_num)
|
self.piecefields[sha512].frombytes(piece_data * piece_num)
|
||||||
else:
|
else:
|
||||||
self.log.debug("Creating bigfile: %s" % inner_path)
|
self.log.debug("Creating bigfile: %s" % inner_path)
|
||||||
self.createSparseFile(inner_path, file_info["size"], sha512)
|
self.createSparseFile(inner_path, file_info["size"], sha512)
|
||||||
self.piecefields[sha512].fromstring("0" * piece_num)
|
self.piecefields[sha512].frombytes(b"\x00" * piece_num)
|
||||||
|
self.log.debug("Created bigfile: %s" % inner_path)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def openBigfile(self, inner_path, prebuffer=0):
|
def openBigfile(self, inner_path, prebuffer=0):
|
||||||
|
@ -595,7 +596,7 @@ class WorkerManagerPlugin(object):
|
||||||
if not self.site.storage.isFile(inner_path):
|
if not self.site.storage.isFile(inner_path):
|
||||||
self.site.storage.createSparseFile(inner_path, file_info["size"], file_info["sha512"])
|
self.site.storage.createSparseFile(inner_path, file_info["size"], file_info["sha512"])
|
||||||
piece_num = int(math.ceil(float(file_info["size"]) / file_info["piece_size"]))
|
piece_num = int(math.ceil(float(file_info["size"]) / file_info["piece_size"]))
|
||||||
self.site.storage.piecefields[file_info["sha512"]].fromstring("0" * piece_num)
|
self.site.storage.piecefields[file_info["sha512"]].frombytes(b"\x00" * piece_num)
|
||||||
else:
|
else:
|
||||||
task = super(WorkerManagerPlugin, self).addTask(inner_path, *args, **kwargs)
|
task = super(WorkerManagerPlugin, self).addTask(inner_path, *args, **kwargs)
|
||||||
return task
|
return task
|
||||||
|
|
|
@ -137,8 +137,8 @@ class TestBigfile:
|
||||||
bad_files = site_temp.storage.verifyFiles(quick_check=True)["bad_files"]
|
bad_files = site_temp.storage.verifyFiles(quick_check=True)["bad_files"]
|
||||||
assert not bad_files
|
assert not bad_files
|
||||||
|
|
||||||
# client_piecefield = peer_client.piecefields[file_info["sha512"]].tostring()
|
# client_piecefield = peer_client.piecefields[file_info["sha512"]].tobytes()
|
||||||
# assert client_piecefield == "1" * 10
|
# assert client_piecefield == b"\x01" * 10
|
||||||
|
|
||||||
# Download 5. and 10. block
|
# Download 5. and 10. block
|
||||||
|
|
||||||
|
@ -187,7 +187,7 @@ class TestBigfile:
|
||||||
|
|
||||||
assert set(site_temp.content_manager.hashfield) == set([18343, 43727])
|
assert set(site_temp.content_manager.hashfield) == set([18343, 43727])
|
||||||
|
|
||||||
assert site_temp.storage.piecefields[f.sha512].tostring() == "0000010001"
|
assert site_temp.storage.piecefields[f.sha512].tobytes() == b"\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01"
|
||||||
assert f.sha512 in site_temp.getSettingsCache()["piecefields"]
|
assert f.sha512 in site_temp.getSettingsCache()["piecefields"]
|
||||||
|
|
||||||
# Test requesting already downloaded
|
# Test requesting already downloaded
|
||||||
|
@ -219,26 +219,26 @@ class TestBigfile:
|
||||||
@pytest.mark.parametrize("piecefield_obj", [BigfilePiecefield, BigfilePiecefieldPacked])
|
@pytest.mark.parametrize("piecefield_obj", [BigfilePiecefield, BigfilePiecefieldPacked])
|
||||||
def testPiecefield(self, piecefield_obj, site):
|
def testPiecefield(self, piecefield_obj, site):
|
||||||
testdatas = [
|
testdatas = [
|
||||||
"1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1",
|
b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01",
|
||||||
"010101" * 10 + "01" * 90 + "10" * 400 + "0" * 4999,
|
b"\x00\x01\x00\x01\x00\x01" * 10 + b"\x00\x01" * 90 + b"\x01\x00" * 400 + b"\x00" * 4999,
|
||||||
"1" * 10000,
|
b"\x01" * 10000,
|
||||||
"0" * 10000
|
b"\x00" * 10000
|
||||||
]
|
]
|
||||||
for testdata in testdatas:
|
for testdata in testdatas:
|
||||||
piecefield = piecefield_obj()
|
piecefield = piecefield_obj()
|
||||||
|
|
||||||
piecefield.fromstring(testdata)
|
piecefield.frombytes(testdata)
|
||||||
assert piecefield.tostring() == testdata
|
assert piecefield.tobytes() == testdata
|
||||||
assert piecefield[0] == int(testdata[0])
|
assert piecefield[0] == testdata[0]
|
||||||
assert piecefield[100] == int(testdata[100])
|
assert piecefield[100] == testdata[100]
|
||||||
assert piecefield[1000] == int(testdata[1000])
|
assert piecefield[1000] == testdata[1000]
|
||||||
assert piecefield[len(testdata) - 1] == int(testdata[len(testdata) - 1])
|
assert piecefield[len(testdata) - 1] == testdata[len(testdata) - 1]
|
||||||
|
|
||||||
packed = piecefield.pack()
|
packed = piecefield.pack()
|
||||||
piecefield_new = piecefield_obj()
|
piecefield_new = piecefield_obj()
|
||||||
piecefield_new.unpack(packed)
|
piecefield_new.unpack(packed)
|
||||||
assert piecefield.tostring() == piecefield_new.tostring()
|
assert piecefield.tobytes() == piecefield_new.tobytes()
|
||||||
assert piecefield_new.tostring() == testdata
|
assert piecefield_new.tobytes() == testdata
|
||||||
|
|
||||||
def testFileGet(self, file_server, site, site_temp):
|
def testFileGet(self, file_server, site, site_temp):
|
||||||
inner_path = self.createBigfile(site)
|
inner_path = self.createBigfile(site)
|
||||||
|
@ -345,7 +345,7 @@ class TestBigfile:
|
||||||
# Create 10 fake peer for each piece
|
# Create 10 fake peer for each piece
|
||||||
for i in range(10):
|
for i in range(10):
|
||||||
peer = Peer(file_server.ip, 1544, site_temp, server2)
|
peer = Peer(file_server.ip, 1544, site_temp, server2)
|
||||||
peer.piecefields[sha512][i] = "1"
|
peer.piecefields[sha512][i] = b"\x01"
|
||||||
peer.updateHashfield = mock.MagicMock(return_value=False)
|
peer.updateHashfield = mock.MagicMock(return_value=False)
|
||||||
peer.updatePiecefields = mock.MagicMock(return_value=False)
|
peer.updatePiecefields = mock.MagicMock(return_value=False)
|
||||||
peer.findHashIds = mock.MagicMock(return_value={"nope": []})
|
peer.findHashIds = mock.MagicMock(return_value={"nope": []})
|
||||||
|
@ -430,7 +430,7 @@ class TestBigfile:
|
||||||
time.sleep(0.5) # Wait prebuffer download
|
time.sleep(0.5) # Wait prebuffer download
|
||||||
|
|
||||||
sha512 = site.content_manager.getFileInfo(inner_path)["sha512"]
|
sha512 = site.content_manager.getFileInfo(inner_path)["sha512"]
|
||||||
assert site_temp.storage.piecefields[sha512].tostring() == "0000011100"
|
assert site_temp.storage.piecefields[sha512].tobytes() == b"\x00\x00\x00\x00\x00\x01\x01\x01\x00\x00"
|
||||||
|
|
||||||
# No prebuffer beyond end of the file
|
# No prebuffer beyond end of the file
|
||||||
f.seek(9 * 1024 * 1024)
|
f.seek(9 * 1024 * 1024)
|
||||||
|
|
|
@ -60,13 +60,13 @@ class UiWebsocketPlugin(object):
|
||||||
bigfile_sha512_cache[file_key] = sha512
|
bigfile_sha512_cache[file_key] = sha512
|
||||||
|
|
||||||
if sha512 in site.storage.piecefields:
|
if sha512 in site.storage.piecefields:
|
||||||
piecefield = site.storage.piecefields[sha512].tostring()
|
piecefield = site.storage.piecefields[sha512].tobytes()
|
||||||
else:
|
else:
|
||||||
piecefield = None
|
piecefield = None
|
||||||
|
|
||||||
if piecefield:
|
if piecefield:
|
||||||
row["pieces"] = len(piecefield)
|
row["pieces"] = len(piecefield)
|
||||||
row["pieces_downloaded"] = piecefield.count("1")
|
row["pieces_downloaded"] = piecefield.count(b"\x01")
|
||||||
row["downloaded_percent"] = 100 * row["pieces_downloaded"] / row["pieces"]
|
row["downloaded_percent"] = 100 * row["pieces_downloaded"] / row["pieces"]
|
||||||
if row["pieces_downloaded"]:
|
if row["pieces_downloaded"]:
|
||||||
if row["pieces"] == row["pieces_downloaded"]:
|
if row["pieces"] == row["pieces_downloaded"]:
|
||||||
|
@ -86,10 +86,10 @@ class UiWebsocketPlugin(object):
|
||||||
for peer in site.peers.values():
|
for peer in site.peers.values():
|
||||||
if not peer.time_piecefields_updated or sha512 not in peer.piecefields:
|
if not peer.time_piecefields_updated or sha512 not in peer.piecefields:
|
||||||
continue
|
continue
|
||||||
peer_piecefield = peer.piecefields[sha512].tostring()
|
peer_piecefield = peer.piecefields[sha512].tobytes()
|
||||||
if not peer_piecefield:
|
if not peer_piecefield:
|
||||||
continue
|
continue
|
||||||
if peer_piecefield == "1" * len(peer_piecefield):
|
if peer_piecefield == b"\x01" * len(peer_piecefield):
|
||||||
row["peer_seed"] += 1
|
row["peer_seed"] += 1
|
||||||
else:
|
else:
|
||||||
row["peer_leech"] += 1
|
row["peer_leech"] += 1
|
||||||
|
|
Loading…
Reference in a new issue