From ec6fd48b86ae68d3a28d24c7856bd92e4f7a1a7f Mon Sep 17 00:00:00 2001
From: radfish <redfish@galactica.pw>
Date: Tue, 16 Apr 2019 09:14:19 -0400
Subject: [PATCH] Bigfile: fix piece field bitmask to be used as bytearray
 consistently (#1982)

* Bigfile: make Piecefield array a bytearray

We want an array of characters. Py2 strings made sense to
use as an array of characters, but Py3 strings are different
and no longer a good choice.

* Bigfile: store bits as binary instead of char

* BigFile: rename to/from string -> to/from bytes

Since the type was changed to bytearray.
---
 plugins/Bigfile/BigfilePiecefield.py         | 74 +++++++++++---------
 plugins/Bigfile/BigfilePlugin.py             | 15 ++--
 plugins/Bigfile/Test/TestBigfile.py          | 34 ++++-----
 plugins/OptionalManager/UiWebsocketPlugin.py |  8 +--
 4 files changed, 69 insertions(+), 62 deletions(-)

diff --git a/plugins/Bigfile/BigfilePiecefield.py b/plugins/Bigfile/BigfilePiecefield.py
index b0bcd96e..5fbcf96b 100644
--- a/plugins/Bigfile/BigfilePiecefield.py
+++ b/plugins/Bigfile/BigfilePiecefield.py
@@ -2,23 +2,24 @@ import array
 
 
 def packPiecefield(data):
+    assert isinstance(data, bytes) or isinstance(data, bytearray)
     res = []
     if not data:
         return array.array("H", b"")
 
-    if data[0] == "0":
+    if data[0] == b"\x00":
         res.append(0)
-        find = "1"
+        find = b"\x01"
     else:
-        find = "0"
+        find = b"\x00"
     last_pos = 0
     pos = 0
     while 1:
         pos = data.find(find, pos)
-        if find == "0":
-            find = "1"
+        if find == b"\x00":
+            find = b"\x01"
         else:
-            find = "0"
+            find = b"\x00"
         if pos == -1:
             res.append(len(data) - last_pos)
             break
@@ -29,31 +30,43 @@ def packPiecefield(data):
 
 def unpackPiecefield(data):
     if not data:
-        return ""
+        return b""
 
     res = []
-    char = "1"
+    char = b"\x01"
     for times in data:
         if times > 10000:
-            return ""
+            return b""
         res.append(char * times)
-        if char == "1":
-            char = "0"
+        if char == b"\x01":
+            char = b"\x00"
         else:
-            char = "1"
-    return "".join(res)
+            char = b"\x01"
+    return b"".join(res)
 
 
-class BigfilePiecefield(object):
+def spliceBit(data, idx, bit):
+    assert bit == b"\x00" or bit == b"\x01"
+    if len(data) < idx:
+        data = data.ljust(idx + 1, b"\x00")
+    return data[:idx] + bit + data[idx+ 1:]
+
+class Piecefield(object):
+    def tostring(self):
+        return "".join(["1" if b else "0" for b in self.tobytes()])
+
+
+class BigfilePiecefield(Piecefield):
     __slots__ = ["data"]
 
     def __init__(self):
         self.data = b""
 
-    def fromstring(self, s):
+    def frombytes(self, s):
+        assert isinstance(s, bytes) or isinstance(s, bytearray)
         self.data = s
 
-    def tostring(self):
+    def tobytes(self):
         return self.data
 
     def pack(self):
@@ -64,28 +77,24 @@ class BigfilePiecefield(object):
 
     def __getitem__(self, key):
         try:
-            return int(self.data[key])
+            return self.data[key]
         except IndexError:
             return False
 
     def __setitem__(self, key, value):
-        data = self.data
-        if len(data) < key:
-            data = data.ljust(key + 1, "0")
-        data = data[:key] + str(int(value)) + data[key + 1:]
-        self.data = data
+        self.data = spliceBit(self.data, key, value)
 
-
-class BigfilePiecefieldPacked(object):
+class BigfilePiecefieldPacked(Piecefield):
     __slots__ = ["data"]
 
     def __init__(self):
         self.data = b""
 
-    def fromstring(self, data):
+    def frombytes(self, data):
+        assert isinstance(data, bytes) or isinstance(data, bytearray)
         self.data = packPiecefield(data).tobytes()
 
-    def tostring(self):
+    def tobytes(self):
         return unpackPiecefield(array.array("H", self.data))
 
     def pack(self):
@@ -96,23 +105,20 @@ class BigfilePiecefieldPacked(object):
 
     def __getitem__(self, key):
         try:
-            return int(self.tostring()[key])
+            return self.tobytes()[key]
         except IndexError:
             return False
 
     def __setitem__(self, key, value):
-        data = self.tostring()
-        if len(data) < key:
-            data = data.ljust(key + 1, "0")
-        data = data[:key] + str(int(value)) + data[key + 1:]
-        self.fromstring(data)
+        data = spliceBit(self.tobytes(), key, value)
+        self.frombytes(data)
 
 
 if __name__ == "__main__":
     import os
     import psutil
     import time
-    testdata = "1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1"
+    testdata = b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01"
     meminfo = psutil.Process(os.getpid()).memory_info
 
     for storage in [BigfilePiecefieldPacked, BigfilePiecefield]:
@@ -122,7 +128,7 @@ if __name__ == "__main__":
         piecefields = {}
         for i in range(10000):
             piecefield = storage()
-            piecefield.fromstring(testdata[:i] + "0" + testdata[i + 1:])
+            piecefield.frombytes(testdata[:i] + b"\x00" + testdata[i + 1:])
             piecefields[i] = piecefield
 
         print("Create x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data)))
diff --git a/plugins/Bigfile/BigfilePlugin.py b/plugins/Bigfile/BigfilePlugin.py
index 3e81eee5..31e7ae9f 100644
--- a/plugins/Bigfile/BigfilePlugin.py
+++ b/plugins/Bigfile/BigfilePlugin.py
@@ -324,7 +324,7 @@ class ContentManagerPlugin(object):
         # Add the merkle root to hashfield
         hash_id = self.site.content_manager.hashfield.getHashId(hash)
         self.optionalDownloaded(inner_path, hash_id, file_size, own=True)
-        self.site.storage.piecefields[hash].fromstring("1" * piece_num)
+        self.site.storage.piecefields[hash].frombytes(b"\x01" * piece_num)
 
         back[file_relative_path] = {"sha512": hash, "size": file_size, "piecemap": piecemap_relative_path, "piece_size": piece_size}
         return back
@@ -361,7 +361,7 @@ class ContentManagerPlugin(object):
 
             # Mark piece downloaded
             piece_i = int(pos_from / file_info["piece_size"])
-            self.site.storage.piecefields[file_info["sha512"]][piece_i] = True
+            self.site.storage.piecefields[file_info["sha512"]][piece_i] = b"\x01"
 
             # Only add to site size on first request
             if hash_id in self.hashfield:
@@ -460,15 +460,16 @@ class SiteStoragePlugin(object):
         if os.path.isfile(file_path):
             if sha512 not in self.piecefields:
                 if open(file_path, "rb").read(128) == b"\0" * 128:
-                    piece_data = "0"
+                    piece_data = b"\x00"
                 else:
-                    piece_data = "1"
+                    piece_data = b"\x01"
                 self.log.debug("%s: File exists, but not in piecefield. Filling piecefiled with %s * %s." % (inner_path, piece_num, piece_data))
-                self.piecefields[sha512].fromstring(piece_data * piece_num)
+                self.piecefields[sha512].frombytes(piece_data * piece_num)
         else:
             self.log.debug("Creating bigfile: %s" % inner_path)
             self.createSparseFile(inner_path, file_info["size"], sha512)
-            self.piecefields[sha512].fromstring("0" * piece_num)
+            self.piecefields[sha512].frombytes(b"\x00" * piece_num)
+            self.log.debug("Created bigfile: %s" % inner_path)
         return True
 
     def openBigfile(self, inner_path, prebuffer=0):
@@ -595,7 +596,7 @@ class WorkerManagerPlugin(object):
             if not self.site.storage.isFile(inner_path):
                 self.site.storage.createSparseFile(inner_path, file_info["size"], file_info["sha512"])
                 piece_num = int(math.ceil(float(file_info["size"]) / file_info["piece_size"]))
-                self.site.storage.piecefields[file_info["sha512"]].fromstring("0" * piece_num)
+                self.site.storage.piecefields[file_info["sha512"]].frombytes(b"\x00" * piece_num)
         else:
             task = super(WorkerManagerPlugin, self).addTask(inner_path, *args, **kwargs)
         return task
diff --git a/plugins/Bigfile/Test/TestBigfile.py b/plugins/Bigfile/Test/TestBigfile.py
index eabf16af..2180f2bd 100644
--- a/plugins/Bigfile/Test/TestBigfile.py
+++ b/plugins/Bigfile/Test/TestBigfile.py
@@ -137,8 +137,8 @@ class TestBigfile:
         bad_files = site_temp.storage.verifyFiles(quick_check=True)["bad_files"]
         assert not bad_files
 
-        # client_piecefield = peer_client.piecefields[file_info["sha512"]].tostring()
-        # assert client_piecefield == "1" * 10
+        # client_piecefield = peer_client.piecefields[file_info["sha512"]].tobytes()
+        # assert client_piecefield == b"\x01" * 10
 
         # Download 5. and 10. block
 
@@ -187,7 +187,7 @@ class TestBigfile:
 
             assert set(site_temp.content_manager.hashfield) == set([18343, 43727])
 
-            assert site_temp.storage.piecefields[f.sha512].tostring() == "0000010001"
+            assert site_temp.storage.piecefields[f.sha512].tobytes() == b"\x00\x00\x00\x00\x00\x01\x00\x00\x00\x01"
             assert f.sha512 in site_temp.getSettingsCache()["piecefields"]
 
             # Test requesting already downloaded
@@ -219,26 +219,26 @@ class TestBigfile:
     @pytest.mark.parametrize("piecefield_obj", [BigfilePiecefield, BigfilePiecefieldPacked])
     def testPiecefield(self, piecefield_obj, site):
         testdatas = [
-            "1" * 100 + "0" * 900 + "1" * 4000 + "0" * 4999 + "1",
-            "010101" * 10 + "01" * 90 + "10" * 400 + "0" * 4999,
-            "1" * 10000,
-            "0" * 10000
+            b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01",
+            b"\x00\x01\x00\x01\x00\x01" * 10 + b"\x00\x01" * 90 + b"\x01\x00" * 400 + b"\x00" * 4999,
+            b"\x01" * 10000,
+            b"\x00" * 10000
         ]
         for testdata in testdatas:
             piecefield = piecefield_obj()
 
-            piecefield.fromstring(testdata)
-            assert piecefield.tostring() == testdata
-            assert piecefield[0] == int(testdata[0])
-            assert piecefield[100] == int(testdata[100])
-            assert piecefield[1000] == int(testdata[1000])
-            assert piecefield[len(testdata) - 1] == int(testdata[len(testdata) - 1])
+            piecefield.frombytes(testdata)
+            assert piecefield.tobytes() == testdata
+            assert piecefield[0] == testdata[0]
+            assert piecefield[100] == testdata[100]
+            assert piecefield[1000] == testdata[1000]
+            assert piecefield[len(testdata) - 1] == testdata[len(testdata) - 1]
 
             packed = piecefield.pack()
             piecefield_new = piecefield_obj()
             piecefield_new.unpack(packed)
-            assert piecefield.tostring() == piecefield_new.tostring()
-            assert piecefield_new.tostring() == testdata
+            assert piecefield.tobytes() == piecefield_new.tobytes()
+            assert piecefield_new.tobytes() == testdata
 
     def testFileGet(self, file_server, site, site_temp):
         inner_path = self.createBigfile(site)
@@ -345,7 +345,7 @@ class TestBigfile:
         # Create 10 fake peer for each piece
         for i in range(10):
             peer = Peer(file_server.ip, 1544, site_temp, server2)
-            peer.piecefields[sha512][i] = "1"
+            peer.piecefields[sha512][i] = b"\x01"
             peer.updateHashfield = mock.MagicMock(return_value=False)
             peer.updatePiecefields = mock.MagicMock(return_value=False)
             peer.findHashIds = mock.MagicMock(return_value={"nope": []})
@@ -430,7 +430,7 @@ class TestBigfile:
             time.sleep(0.5)  # Wait prebuffer download
 
             sha512 = site.content_manager.getFileInfo(inner_path)["sha512"]
-            assert site_temp.storage.piecefields[sha512].tostring() == "0000011100"
+            assert site_temp.storage.piecefields[sha512].tobytes() == b"\x00\x00\x00\x00\x00\x01\x01\x01\x00\x00"
 
             # No prebuffer beyond end of the file
             f.seek(9 * 1024 * 1024)
diff --git a/plugins/OptionalManager/UiWebsocketPlugin.py b/plugins/OptionalManager/UiWebsocketPlugin.py
index cf35bc8c..52b0f749 100644
--- a/plugins/OptionalManager/UiWebsocketPlugin.py
+++ b/plugins/OptionalManager/UiWebsocketPlugin.py
@@ -60,13 +60,13 @@ class UiWebsocketPlugin(object):
             bigfile_sha512_cache[file_key] = sha512
 
         if sha512 in site.storage.piecefields:
-            piecefield = site.storage.piecefields[sha512].tostring()
+            piecefield = site.storage.piecefields[sha512].tobytes()
         else:
             piecefield = None
 
         if piecefield:
             row["pieces"] = len(piecefield)
-            row["pieces_downloaded"] = piecefield.count("1")
+            row["pieces_downloaded"] = piecefield.count(b"\x01")
             row["downloaded_percent"] = 100 * row["pieces_downloaded"] / row["pieces"]
             if row["pieces_downloaded"]:
                 if row["pieces"] == row["pieces_downloaded"]:
@@ -86,10 +86,10 @@ class UiWebsocketPlugin(object):
         for peer in site.peers.values():
             if not peer.time_piecefields_updated or sha512 not in peer.piecefields:
                 continue
-            peer_piecefield = peer.piecefields[sha512].tostring()
+            peer_piecefield = peer.piecefields[sha512].tobytes()
             if not peer_piecefield:
                 continue
-            if peer_piecefield == "1" * len(peer_piecefield):
+            if peer_piecefield == b"\x01" * len(peer_piecefield):
                 row["peer_seed"] += 1
             else:
                 row["peer_leech"] += 1