Bigfile: fix piece field bitmask to be used as bytearray consistently (#1982)

* Bigfile: make Piecefield array a bytearray

We want an array of characters. Py2 strings made sense to
use as an array of characters, but Py3 strings are different
and no longer a good choice.

* Bigfile: store bits as binary instead of char

* BigFile: rename to/from string -> to/from bytes

Since the type was changed to bytearray.
This commit is contained in:
radfish 2019-04-16 09:14:19 -04:00 committed by ZeroNet
parent 1516d55a88
commit ec6fd48b86
4 changed files with 69 additions and 62 deletions

View file

@ -2,23 +2,24 @@ import array
def packPiecefield(data):
assert isinstance(data, bytes) or isinstance(data, bytearray)
res = []
if not data:
return array.array("H", b"")
if data[0] == "0":
if data[0] == b"\x00":
res.append(0)
find = "1"
find = b"\x01"
else:
find = "0"
find = b"\x00"
last_pos = 0
pos = 0
while 1:
pos = data.find(find, pos)
if find == "0":
find = "1"
if find == b"\x00":
find = b"\x01"
else:
find = "0"
find = b"\x00"
if pos == -1:
res.append(len(data) - last_pos)
break
@ -29,31 +30,43 @@ def packPiecefield(data):
def unpackPiecefield(data):
if not data:
return ""
return b""
res = []
char = "1"
char = b"\x01"
for times in data:
if times > 10000:
return ""
return b""
res.append(char * times)
if char == "1":
char = "0"
if char == b"\x01":
char = b"\x00"
else:
char = "1"
return "".join(res)
char = b"\x01"
return b"".join(res)
class BigfilePiecefield(object):
def spliceBit(data, idx, bit):
assert bit == b"\x00" or bit == b"\x01"
if len(data) < idx:
data = data.ljust(idx + 1, b"\x00")
return data[:idx] + bit + data[idx+ 1:]
class Piecefield(object):
def tostring(self):
return "".join(["1" if b else "0" for b in self.tobytes()])
class BigfilePiecefield(Piecefield):
__slots__ = ["data"]
def __init__(self):
self.data = b""
def fromstring(self, s):
def frombytes(self, s):
assert isinstance(s, bytes) or isinstance(s, bytearray)
self.data = s
def tostring(self):
def tobytes(self):
return self.data
def pack(self):
@ -64,28 +77,24 @@ class BigfilePiecefield(object):
def __getitem__(self, key):
try:
return int(self.data[key])
return self.data[key]
except IndexError:
return False
def __setitem__(self, key, value):
data = self.data
if len(data) < key:
data = data.ljust(key + 1, "0")
data = data[:key] + str(int(value)) + data[key + 1:]
self.data = data
self.data = spliceBit(self.data, key, value)
class BigfilePiecefieldPacked(object):
class BigfilePiecefieldPacked(Piecefield):
__slots__ = ["data"]
def __init__(self):
self.data = b""
def fromstring(self, data):
def frombytes(self, data):
assert isinstance(data, bytes) or isinstance(data, bytearray)
self.data = packPiecefield(data).tobytes()
def tostring(self):
def tobytes(self):
return unpackPiecefield(array.array("H", self.data))
def pack(self):
@ -96,23 +105,20 @@ class BigfilePiecefieldPacked(object):
def __getitem__(self, key):
try:
return int(self.tostring()[key])
return self.tobytes()[key]
except IndexError:
return False
def __setitem__(self, key, value):
data = self.tostring()
if len(data) < key:
data = data.ljust(key + 1, "0")
data = data[:key] + str(int(value)) + data[key + 1:]
self.fromstring(data)
data = spliceBit(self.tobytes(), key, value)
self.frombytes(data)
if __name__ == "__main__":
import os
import psutil
import time
testdata = "1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1"
testdata = b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01"
meminfo = psutil.Process(os.getpid()).memory_info
for storage in [BigfilePiecefieldPacked, BigfilePiecefield]:
@ -122,7 +128,7 @@ if __name__ == "__main__":
piecefields = {}
for i in range(10000):
piecefield = storage()
piecefield.fromstring(testdata[:i] + "0" + testdata[i + 1:])
piecefield.frombytes(testdata[:i] + b"\x00" + testdata[i + 1:])
piecefields[i] = piecefield
print("Create x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data)))

View file

@ -324,7 +324,7 @@ class ContentManagerPlugin(object):
# Add the merkle root to hashfield
hash_id = self.site.content_manager.hashfield.getHashId(hash)
self.optionalDownloaded(inner_path, hash_id, file_size, own=True)
self.site.storage.piecefields[hash].fromstring("1" * piece_num)
self.site.storage.piecefields[hash].frombytes(b"\x01" * piece_num)
back[file_relative_path] = {"sha512": hash, "size": file_size, "piecemap": piecemap_relative_path, "piece_size": piece_size}
return back
@ -361,7 +361,7 @@ class ContentManagerPlugin(object):
# Mark piece downloaded
piece_i = int(pos_from / file_info["piece_size"])
self.site.storage.piecefields[file_info["sha512"]][piece_i] = True
self.site.storage.piecefields[file_info["sha512"]][piece_i] = b"\x01"
# Only add to site size on first request
if hash_id in self.hashfield:
@ -460,15 +460,16 @@ class SiteStoragePlugin(object):
if os.path.isfile(file_path):
if sha512 not in self.piecefields:
if open(file_path, "rb").read(128) == b"\0" * 128:
piece_data = "0"
piece_data = b"\x00"
else:
piece_data = "1"
piece_data = b"\x01"
self.log.debug("%s: File exists, but not in piecefield. Filling piecefiled with %s * %s." % (inner_path, piece_num, piece_data))
self.piecefields[sha512].fromstring(piece_data * piece_num)
self.piecefields[sha512].frombytes(piece_data * piece_num)
else:
self.log.debug("Creating bigfile: %s" % inner_path)
self.createSparseFile(inner_path, file_info["size"], sha512)
self.piecefields[sha512].fromstring("0" * piece_num)
self.piecefields[sha512].frombytes(b"\x00" * piece_num)
self.log.debug("Created bigfile: %s" % inner_path)
return True
def openBigfile(self, inner_path, prebuffer=0):
@ -595,7 +596,7 @@ class WorkerManagerPlugin(object):
if not self.site.storage.isFile(inner_path):
self.site.storage.createSparseFile(inner_path, file_info["size"], file_info["sha512"])
piece_num = int(math.ceil(float(file_info["size"]) / file_info["piece_size"]))
self.site.storage.piecefields[file_info["sha512"]].fromstring("0" * piece_num)
self.site.storage.piecefields[file_info["sha512"]].frombytes(b"\x00" * piece_num)
else:
task = super(WorkerManagerPlugin, self).addTask(inner_path, *args, **kwargs)
return task

View file

@ -137,8 +137,8 @@ class TestBigfile:
bad_files = site_temp.storage.verifyFiles(quick_check=True)["bad_files"]
assert not bad_files
# client_piecefield = peer_client.piecefields[file_info["sha512"]].tostring()
# assert client_piecefield == "1" * 10
# client_piecefield = peer_client.piecefields[file_info["sha512"]].tobytes()
# assert client_piecefield == b"\x01" * 10
# Download 5. and 10. block
@ -187,7 +187,7 @@ class TestBigfile:
assert set(site_temp.content_manager.hashfield) == set([18343, 43727])
assert site_temp.storage.piecefields[f.sha512].tostring() == "0000010001"
assert site_temp.storage.piecefields[f.sha512].tobytes() == b"\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01"
assert f.sha512 in site_temp.getSettingsCache()["piecefields"]
# Test requesting already downloaded
@ -219,26 +219,26 @@ class TestBigfile:
@pytest.mark.parametrize("piecefield_obj", [BigfilePiecefield, BigfilePiecefieldPacked])
def testPiecefield(self, piecefield_obj, site):
testdatas = [
"1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1",
"010101" * 10 + "01" * 90 + "10" * 400 + "0" * 4999,
"1" * 10000,
"0" * 10000
b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01",
b"\x00\x01\x00\x01\x00\x01" * 10 + b"\x00\x01" * 90 + b"\x01\x00" * 400 + b"\x00" * 4999,
b"\x01" * 10000,
b"\x00" * 10000
]
for testdata in testdatas:
piecefield = piecefield_obj()
piecefield.fromstring(testdata)
assert piecefield.tostring() == testdata
assert piecefield[0] == int(testdata[0])
assert piecefield[100] == int(testdata[100])
assert piecefield[1000] == int(testdata[1000])
assert piecefield[len(testdata) - 1] == int(testdata[len(testdata) - 1])
piecefield.frombytes(testdata)
assert piecefield.tobytes() == testdata
assert piecefield[0] == testdata[0]
assert piecefield[100] == testdata[100]
assert piecefield[1000] == testdata[1000]
assert piecefield[len(testdata) - 1] == testdata[len(testdata) - 1]
packed = piecefield.pack()
piecefield_new = piecefield_obj()
piecefield_new.unpack(packed)
assert piecefield.tostring() == piecefield_new.tostring()
assert piecefield_new.tostring() == testdata
assert piecefield.tobytes() == piecefield_new.tobytes()
assert piecefield_new.tobytes() == testdata
def testFileGet(self, file_server, site, site_temp):
inner_path = self.createBigfile(site)
@ -345,7 +345,7 @@ class TestBigfile:
# Create 10 fake peer for each piece
for i in range(10):
peer = Peer(file_server.ip, 1544, site_temp, server2)
peer.piecefields[sha512][i] = "1"
peer.piecefields[sha512][i] = b"\x01"
peer.updateHashfield = mock.MagicMock(return_value=False)
peer.updatePiecefields = mock.MagicMock(return_value=False)
peer.findHashIds = mock.MagicMock(return_value={"nope": []})
@ -430,7 +430,7 @@ class TestBigfile:
time.sleep(0.5) # Wait prebuffer download
sha512 = site.content_manager.getFileInfo(inner_path)["sha512"]
assert site_temp.storage.piecefields[sha512].tostring() == "0000011100"
assert site_temp.storage.piecefields[sha512].tobytes() == b"\x00\x00\x00\x00\x00\x01\x01\x01\x00\x00"
# No prebuffer beyond end of the file
f.seek(9 * 1024 * 1024)

View file

@ -60,13 +60,13 @@ class UiWebsocketPlugin(object):
bigfile_sha512_cache[file_key] = sha512
if sha512 in site.storage.piecefields:
piecefield = site.storage.piecefields[sha512].tostring()
piecefield = site.storage.piecefields[sha512].tobytes()
else:
piecefield = None
if piecefield:
row["pieces"] = len(piecefield)
row["pieces_downloaded"] = piecefield.count("1")
row["pieces_downloaded"] = piecefield.count(b"\x01")
row["downloaded_percent"] = 100 * row["pieces_downloaded"] / row["pieces"]
if row["pieces_downloaded"]:
if row["pieces"] == row["pieces_downloaded"]:
@ -86,10 +86,10 @@ class UiWebsocketPlugin(object):
for peer in site.peers.values():
if not peer.time_piecefields_updated or sha512 not in peer.piecefields:
continue
peer_piecefield = peer.piecefields[sha512].tostring()
peer_piecefield = peer.piecefields[sha512].tobytes()
if not peer_piecefield:
continue
if peer_piecefield == "1" * len(peer_piecefield):
if peer_piecefield == b"\x01" * len(peer_piecefield):
row["peer_seed"] += 1
else:
row["peer_leech"] += 1