diff --git a/src/Config.py b/src/Config.py index 6bd44974..4fd29a02 100644 --- a/src/Config.py +++ b/src/Config.py @@ -8,7 +8,7 @@ class Config(object): def __init__(self, argv): self.version = "0.3.2" - self.rev = 467 + self.rev = 470 self.argv = argv self.action = None self.createParser() diff --git a/src/Content/ContentManager.py b/src/Content/ContentManager.py index 094a11b2..abcec3ce 100644 --- a/src/Content/ContentManager.py +++ b/src/Content/ContentManager.py @@ -10,6 +10,8 @@ from Debug import Debug from Crypt import CryptHash from Config import config from util import helper +from Peer import PeerHashfield + class ContentManager(object): @@ -17,6 +19,8 @@ class ContentManager(object): self.site = site self.log = self.site.log self.contents = {} # Known content.json (without files and includes) + self.hashfield = PeerHashfield() + self.site.onFileDone.append(lambda inner_path: self.addOptionalFile(inner_path)) self.loadContent(add_bad_files=False, delete_removed_files=False) self.site.settings["size"] = self.getTotalSize() @@ -302,7 +306,9 @@ class ContentManager(object): self.log.info("Opening site data directory: %s..." % directory) changed_files = [inner_path] - files_node, files_optional_node = self.hashFiles(helper.getDirname(inner_path), content.get("ignore"), content.get("optional")) + files_node, files_optional_node = self.hashFiles( + helper.getDirname(inner_path), content.get("ignore"), content.get("optional") + ) # Find changed files files_merged = files_node.copy() @@ -459,7 +465,9 @@ class ContentManager(object): if rules.get("max_size_optional") is not None: # Include optional files limit if content_size_optional > rules["max_size_optional"]: - self.log.error("%s: Include optional files too large %s > %s" % (inner_path, content_size_optional, rules["max_size_optional"])) + self.log.error("%s: Include optional files too large %s > %s" % ( + inner_path, content_size_optional, rules["max_size_optional"]) + ) return False # Filename limit @@ -567,6 +575,12 @@ class ContentManager(object): self.log.error("File not in content.json: %s" % inner_path) return False + def addOptionalFile(self, inner_path): + info = self.getFileInfo(inner_path) + if info and info["optional"]: + self.log.debug("Downloaded optional file, adding to hashfield: %s" % inner_path) + self.hashfield.appendHash(info["sha512"]) + if __name__ == "__main__": def testSign(): diff --git a/src/File/FileRequest.py b/src/File/FileRequest.py index 71586aa1..23d90334 100644 --- a/src/File/FileRequest.py +++ b/src/File/FileRequest.py @@ -65,6 +65,8 @@ class FileRequest(object): self.actionPex(params) elif cmd == "listModified": self.actionListModified(params) + elif cmd == "getHashfield": + self.actionGetHashfield(params) elif cmd == "ping": self.actionPing() else: @@ -257,6 +259,20 @@ class FileRequest(object): self.response({"modified_files": modified_files}) + def actionGetHashfield(self, params): + site = self.sites.get(params["site"]) + if not site or not site.settings["serving"]: # Site unknown or not serving + self.response({"error": "Unknown site"}) + return False + + # Add peer to site if not added before + connected_peer = site.addPeer(self.connection.ip, self.connection.port) + if connected_peer: # Just added + connected_peer.connect(self.connection) # Assign current connection to peer + + self.response({"hashfield_raw": site.content_manager.hashfield.tostring()}) + + # Send a simple Pong! answer def actionPing(self): self.response("Pong!") diff --git a/src/Peer/Peer.py b/src/Peer/Peer.py index af3ca228..a0f65005 100644 --- a/src/Peer/Peer.py +++ b/src/Peer/Peer.py @@ -8,6 +8,7 @@ from cStringIO import StringIO from Debug import Debug from Config import config from util import helper +from PeerHashfield import PeerHashfield if config.use_tempfiles: import tempfile @@ -27,8 +28,8 @@ class Peer(object): self.key = "%s:%s" % (ip, port) self.connection = None - self.hashfield = array.array("H") # Got optional files hash_id - self.last_hashfield = None # Last time hashfiled downloaded + self.hashfield = PeerHashfield() # Got optional files hash_id + self.last_hashfield = 0 # Last time hashfiled downloaded self.last_found = time.time() # Time of last found in the torrent tracker self.last_response = None # Time of last successful response from peer self.last_ping = None # Last response time for ping @@ -221,6 +222,16 @@ class Peer(object): self.log("Added peers using pex: %s" % added) return added + # Request optional files hashfield from peer + def getHashfield(self): + self.last_hashfield = time.time() + res = self.request("getHashfield", {"site": self.site.address}) + if res and "error" not in res: + self.hashfield.replaceFromString(res["hashfield_raw"]) + return self.hashfield + else: + return False + # List modified files since the date # Return: {inner_path: modification date,...} def listModified(self, since): diff --git a/src/Peer/PeerHashfield.py b/src/Peer/PeerHashfield.py new file mode 100644 index 00000000..593932d3 --- /dev/null +++ b/src/Peer/PeerHashfield.py @@ -0,0 +1,42 @@ +import array + + +class PeerHashfield(): + def __init__(self): + self.storage = self.createStoreage() + + def createStoreage(self): + storage = array.array("H") + self.append = storage.append + self.remove = storage.remove + self.tostring = storage.tostring + self.fromstring = storage.fromstring + self.__len__ = storage.__len__ + self.__iter__ = storage.__iter__ + return storage + + def appendHash(self, hash): + hash_id = int(hash[0:4], 16) + if hash_id not in self: + self.append(hash_id) + return True + else: + return False + + def removeHash(self, hash): + hash_id = int(hash[0:4], 16) + if hash_id in self: + self.remove(hash_id) + return True + else: + return False + + def getHashId(self, hash): + return int(hash[0:4], 16) + + def hasHash(self, hash): + return int(hash[0:4], 16) in self + + def replaceFromString(self, hashfield_raw): + self.storage = self.createStoreage() + self.fromstring(hashfield_raw) \ No newline at end of file diff --git a/src/Peer/__init__.py b/src/Peer/__init__.py index 82110b81..3e92827f 100644 --- a/src/Peer/__init__.py +++ b/src/Peer/__init__.py @@ -1 +1,2 @@ from Peer import Peer +from PeerHashfield import PeerHashfield diff --git a/src/Site/Site.py b/src/Site/Site.py index cdaad9d8..f236421e 100644 --- a/src/Site/Site.py +++ b/src/Site/Site.py @@ -34,6 +34,7 @@ class Site: self.address = re.sub("[^A-Za-z0-9]", "", address) # Make sure its correct address self.address_short = "%s..%s" % (self.address[:6], self.address[-4:]) # Short address for logging self.log = logging.getLogger("Site:%s" % self.address_short) + self.addEventListeners() self.content = None # Load content.json self.peers = {} # Key: ip:port, Value: Peer.Peer @@ -66,9 +67,6 @@ class Site: self.websockets = [] # Active site websocket connections - # Add event listeners - self.addEventListeners() - def __str__(self): return "Site %s" % self.address_short diff --git a/src/Site/SiteStorage.py b/src/Site/SiteStorage.py index 5b87f306..4c78c977 100644 --- a/src/Site/SiteStorage.py +++ b/src/Site/SiteStorage.py @@ -218,6 +218,10 @@ class SiteStorage: def isFile(self, inner_path): return os.path.isfile(self.getPath(inner_path)) + # File or directory exist + def isExists(self, inner_path): + return os.path.exists(self.getPath(inner_path)) + # Dir exist def isDir(self, inner_path): return os.path.isdir(self.getPath(inner_path)) @@ -246,6 +250,7 @@ class SiteStorage: # Verify all files sha512sum using content.json def verifyFiles(self, quick_check=False): # Fast = using file size bad_files = [] + if not self.site.content_manager.contents.get("content.json"): # No content.json, download it first self.site.needFile("content.json", update=True) # Force update to fix corrupt file self.site.content_manager.loadContent() # Reload content.json @@ -253,7 +258,8 @@ class SiteStorage: if not os.path.isfile(self.getPath(content_inner_path)): # Missing content.json file self.log.debug("[MISSING] %s" % content_inner_path) bad_files.append(content_inner_path) - for file_relative_path in content["files"].keys(): + + for file_relative_path in content.get("files", {}).keys(): file_inner_path = helper.getDirname(content_inner_path) + file_relative_path # Relative to site dir file_inner_path = file_inner_path.strip("/") # Strip leading / file_path = self.getPath(file_inner_path) @@ -270,9 +276,34 @@ class SiteStorage: if not ok: self.log.debug("[CHANGED] %s" % file_inner_path) bad_files.append(file_inner_path) + + # Optional files + optional_added = 0 + optional_removed = 0 + for file_relative_path in content.get("files_optional", {}).keys(): + file_inner_path = helper.getDirname(content_inner_path) + file_relative_path # Relative to site dir + file_inner_path = file_inner_path.strip("/") # Strip leading / + file_path = self.getPath(file_inner_path) + if not os.path.isfile(file_path): + self.site.content_manager.hashfield.removeHash(content["files_optional"][file_relative_path]["sha512"]) + continue + + if quick_check: + ok = os.path.getsize(file_path) == content["files_optional"][file_relative_path]["size"] + else: + ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb")) + + if ok: + self.site.content_manager.hashfield.appendHash(content["files_optional"][file_relative_path]["sha512"]) + optional_added += 1 + else: + self.site.content_manager.hashfield.removeHash(content["files_optional"][file_relative_path]["sha512"]) + optional_removed += 1 + self.log.debug("[OPTIONAL CHANGED] %s" % file_inner_path) + self.log.debug( - "%s verified: %s files, quick_check: %s, bad files: %s" % - (content_inner_path, len(content["files"]), quick_check, bad_files) + "%s verified: %s, quick: %s, bad: %s, optionals: +%s -%s" % + (content_inner_path, len(content["files"]), quick_check, bad_files, optional_added, optional_removed) ) return bad_files diff --git a/src/Test/TestPeer.py b/src/Test/TestPeer.py index 5c9a7aba..ac6521d7 100644 --- a/src/Test/TestPeer.py +++ b/src/Test/TestPeer.py @@ -1,11 +1,9 @@ -import cStringIO as StringIO import pytest -import time -from Connection import ConnectionServer -from Connection import Connection from File import FileServer +from Crypt import CryptHash +from cStringIO import StringIO @pytest.mark.usefixtures("resetSettings") @@ -52,3 +50,47 @@ class TestFileRequest: connection.close() client.stop() + + def testHashfield(self, site): + sample_hash = site.content_manager.contents["content.json"]["files_optional"].values()[0]["sha512"] + site.storage.verifyFiles(quick_check=True) # Find what optional files we have + + # Check if hashfield has any files + assert len(site.content_manager.hashfield) > 0 + + # Check exsist hash + assert site.content_manager.hashfield.getHashId(sample_hash) in site.content_manager.hashfield + + # Add new hash + new_hash = CryptHash.sha512sum(StringIO("hello")) + assert site.content_manager.hashfield.getHashId(new_hash) not in site.content_manager.hashfield + assert site.content_manager.hashfield.appendHash(new_hash) + assert not site.content_manager.hashfield.appendHash(new_hash) # Don't add second time + assert site.content_manager.hashfield.getHashId(new_hash) in site.content_manager.hashfield + + # Remove new hash + assert site.content_manager.hashfield.removeHash(new_hash) + assert site.content_manager.hashfield.getHashId(new_hash) not in site.content_manager.hashfield + + def testHashfieldExchange(self, file_server, site, site_temp): + file_server.ip_incoming = {} # Reset flood protection + file_server.sites[site.address] = site + client = FileServer("127.0.0.1", 1545) + client.sites[site_temp.address] = site_temp + site_temp.connection_server = client + connection = client.getConnection("127.0.0.1", 1544) + site.storage.verifyFiles(quick_check=True) # Find what optional files we have + + # Add file_server as peer to client + peer_file_server = site_temp.addPeer("127.0.0.1", 1544) + + # Check if hashfield has any files + assert len(site.content_manager.hashfield) > 0 + + # Testing hashfield sync + assert len(peer_file_server.hashfield) == 0 + assert peer_file_server.getHashfield() + assert len(peer_file_server.hashfield) > 0 + + connection.close() + client.stop() diff --git a/src/Test/TestSiteDownload.py b/src/Test/TestSiteDownload.py index 86e5f4d8..7630f847 100644 --- a/src/Test/TestSiteDownload.py +++ b/src/Test/TestSiteDownload.py @@ -1,16 +1,15 @@ import pytest import mock -import time from Connection import ConnectionServer from Config import config -from Site import Site from File import FileRequest import Spy + @pytest.mark.usefixtures("resetTempSettings") @pytest.mark.usefixtures("resetSettings") -class TestWorker: +class TestSiteDownload: def testDownload(self, file_server, site, site_temp): client = ConnectionServer("127.0.0.1", 1545) assert site.storage.directory == config.data_dir + "/" + site.address @@ -57,7 +56,13 @@ class TestWorker: # Optional user file assert not site_temp.storage.isFile("data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif") + optional_file_info = site_temp.content_manager.getFileInfo( + "data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif" + ) + assert not site_temp.content_manager.hashfield.hasHash(optional_file_info["sha512"]) + site_temp.needFile("data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif") assert site_temp.storage.isFile("data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif") + assert site_temp.content_manager.hashfield.hasHash(optional_file_info["sha512"]) assert site_temp.storage.deleteFiles()