From 3d558a4edfe527450d99458f10cf7681eb96590f Mon Sep 17 00:00:00 2001 From: HelloZeroNet Date: Sun, 15 Nov 2015 11:13:57 +0100 Subject: [PATCH] Rev597, Dont load content.json if the modified date is same, Big content.json memory optimalzations, Peerhashfield memory optimalzations and typo fix, Give up on file after 10 retry, Fix non-ascii install paths, Stop worker after 10 connection error --- src/Config.py | 2 +- src/Content/ContentManager.py | 13 +++++++++++-- src/Peer/PeerHashfield.py | 9 +++++---- src/Site/Site.py | 6 ++++++ src/Site/SiteStorage.py | 3 ++- src/Worker/Worker.py | 2 +- 6 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/Config.py b/src/Config.py index 043a003c..503108db 100644 --- a/src/Config.py +++ b/src/Config.py @@ -8,7 +8,7 @@ class Config(object): def __init__(self, argv): self.version = "0.3.3" - self.rev = 582 + self.rev = 597 self.argv = argv self.action = None self.createParser() diff --git a/src/Content/ContentManager.py b/src/Content/ContentManager.py index 65e5d1c8..dd310b95 100644 --- a/src/Content/ContentManager.py +++ b/src/Content/ContentManager.py @@ -35,6 +35,15 @@ class ContentManager(object): if os.path.isfile(content_path): try: + # Check if file is newer than what we have + if old_content: + for line in open(content_path): + if '"modified"' in line: + match = re.search("([0-9\.]+),$", line.strip(" \r\n")) + if match and float(match.group(1)) <= old_content.get("modified", 0): + self.log.debug("loadContent same json file, skipping") + return [], [] + new_content = json.load(open(content_path)) except Exception, err: self.log.error("%s load error: %s" % (content_path, Debug.formatException(err))) @@ -48,7 +57,7 @@ class ContentManager(object): changed = [] deleted = [] # Check changed - for relative_path, info in new_content.get("files", {}).items(): + for relative_path, info in new_content.get("files", {}).iteritems(): if "sha512" in info: hash_type = "sha512" else: # Backward compatiblity @@ -63,7 +72,7 @@ class ContentManager(object): changed.append(content_inner_dir + relative_path) # Check changed optional files - for relative_path, info in new_content.get("files_optional", {}).items(): + for relative_path, info in new_content.get("files_optional", {}).iteritems(): file_inner_path = content_inner_dir + relative_path new_hash = info["sha512"] if old_content and old_content.get("files_optional", {}).get(relative_path): # We have the file in the old content diff --git a/src/Peer/PeerHashfield.py b/src/Peer/PeerHashfield.py index d7eeddc8..7e4f9184 100644 --- a/src/Peer/PeerHashfield.py +++ b/src/Peer/PeerHashfield.py @@ -2,12 +2,13 @@ import array import time -class PeerHashfield: +class PeerHashfield(object): + __slots__ = ("storage", "time_changed", "append", "remove", "tostring", "fromstring", "__len__", "__iter__") def __init__(self): - self.storage = self.createStoreage() + self.storage = self.createStorage() self.time_changed = time.time() - def createStoreage(self): + def createStorage(self): storage = array.array("H") self.append = storage.append self.remove = storage.remove @@ -50,7 +51,7 @@ class PeerHashfield: return int(hash[0:4], 16) in self.storage def replaceFromString(self, hashfield_raw): - self.storage = self.createStoreage() + self.storage = self.createStorage() self.storage.fromstring(hashfield_raw) self.time_changed = time.time() diff --git a/src/Site/Site.py b/src/Site/Site.py index 8e91ed9c..9b79d68b 100644 --- a/src/Site/Site.py +++ b/src/Site/Site.py @@ -485,8 +485,14 @@ class Site: self.log.debug("No info for %s, waiting for all content.json" % inner_path) success = self.downloadContent("content.json", download_files=False) if not success: + if self.bad_files.get(inner_path, 0) > 10: + del self.bad_files[inner_path] + self.log.debug("Max retry reached, giving up on %s" % inner_path) return False if not self.content_manager.getFileInfo(inner_path): + if self.bad_files.get(inner_path, 0) > 10: + del self.bad_files[inner_path] + self.log.debug("Max retry reached, giving up on %s" % inner_path) return False # Still no info for file task = self.worker_manager.addTask(inner_path, peer, priority=priority) diff --git a/src/Site/SiteStorage.py b/src/Site/SiteStorage.py index bc276b3d..6edd77dc 100644 --- a/src/Site/SiteStorage.py +++ b/src/Site/SiteStorage.py @@ -3,6 +3,7 @@ import re import shutil import json import time +import sys import sqlite3 import gevent.event @@ -18,7 +19,7 @@ class SiteStorage: def __init__(self, site, allow_create=True): self.site = site self.directory = "%s/%s" % (config.data_dir, self.site.address) # Site data diretory - self.allowed_dir = os.path.abspath(self.directory) # Only serve/modify file within this dir + self.allowed_dir = os.path.abspath(self.directory.decode(sys.getfilesystemencoding())) # Only serve/modify file within this dir self.log = site.log self.db = None # Db class self.db_checked = False # Checked db tables since startup diff --git a/src/Worker/Worker.py b/src/Worker/Worker.py index 963aed54..1d0e9fe2 100644 --- a/src/Worker/Worker.py +++ b/src/Worker/Worker.py @@ -73,7 +73,7 @@ class Worker(object): task["failed"].append(self.peer) self.task = None self.peer.hash_failed += 1 - if self.peer.hash_failed >= max(len(self.manager.tasks), 3): + if self.peer.hash_failed >= max(len(self.manager.tasks), 3) or self.peer.connection_error > 10: # Broken peer: More fails than tasks number but atleast 3 break task["workers_num"] -= 1