Rev477, FindHashId command for optional files, Renaming some variables to make it more clear, Optional files downloading in WorkerManager, Test FindHash, Test Optional download, Test FindOptional

This commit is contained in:
HelloZeroNet 2015-10-22 11:42:55 +02:00
parent e8ce26587d
commit a8dae8dd85
9 changed files with 348 additions and 93 deletions

View file

@ -8,7 +8,7 @@ class Config(object):
def __init__(self, argv): def __init__(self, argv):
self.version = "0.3.2" self.version = "0.3.2"
self.rev = 473 self.rev = 477
self.argv = argv self.argv = argv
self.action = None self.action = None
self.createParser() self.createParser()

View file

@ -67,6 +67,8 @@ class FileRequest(object):
self.actionListModified(params) self.actionListModified(params)
elif cmd == "getHashfield": elif cmd == "getHashfield":
self.actionGetHashfield(params) self.actionGetHashfield(params)
elif cmd == "findHashIds":
self.actionFindHashIds(params)
elif cmd == "ping": elif cmd == "ping":
self.actionPing() self.actionPing()
else: else:
@ -272,6 +274,17 @@ class FileRequest(object):
self.response({"hashfield_raw": site.content_manager.hashfield.tostring()}) self.response({"hashfield_raw": site.content_manager.hashfield.tostring()})
def actionFindHashIds(self, params):
site = self.sites.get(params["site"])
if not site or not site.settings["serving"]: # Site unknown or not serving
self.response({"error": "Unknown site"})
return False
found = site.worker_manager.findOptionalHashIds(params["hash_ids"])
back = {}
for hash_id, peers in found.iteritems():
back[hash_id] = [helper.packAddress(peer.ip, peer.port) for peer in peers]
self.response({"peers": back})
# Send a simple Pong! answer # Send a simple Pong! answer
def actionPing(self): def actionPing(self):

View file

@ -1,6 +1,5 @@
import logging import logging
import time import time
import array
import gevent import gevent
@ -17,8 +16,8 @@ if config.use_tempfiles:
# Communicate remote peers # Communicate remote peers
class Peer(object): class Peer(object):
__slots__ = ( __slots__ = (
"ip", "port", "site", "key", "connection", "last_found", "last_response", "last_ping", "last_hashfield", "ip", "port", "site", "key", "connection", "time_found", "time_response", "time_hashfield", "time_added",
"hashfield", "added", "connection_error", "hash_failed", "download_bytes", "download_time" "last_ping", "hashfield", "connection_error", "hash_failed", "download_bytes", "download_time"
) )
def __init__(self, ip, port, site=None): def __init__(self, ip, port, site=None):
@ -29,11 +28,11 @@ class Peer(object):
self.connection = None self.connection = None
self.hashfield = PeerHashfield() # Got optional files hash_id self.hashfield = PeerHashfield() # Got optional files hash_id
self.last_hashfield = 0 # Last time hashfiled downloaded self.time_hashfield = None # Last time hashfiled downloaded
self.last_found = time.time() # Time of last found in the torrent tracker self.time_found = time.time() # Time of last found in the torrent tracker
self.last_response = None # Time of last successful response from peer self.time_response = None # Time of last successful response from peer
self.time_added = time.time()
self.last_ping = None # Last response time for ping self.last_ping = None # Last response time for ping
self.added = time.time()
self.connection_error = 0 # Series of connection error self.connection_error = 0 # Series of connection error
self.hash_failed = 0 # Number of bad files from peer self.hash_failed = 0 # Number of bad files from peer
@ -86,7 +85,7 @@ class Peer(object):
# Found a peer on tracker # Found a peer on tracker
def found(self): def found(self):
self.last_found = time.time() self.time_found = time.time()
# Send a command to peer # Send a command to peer
def request(self, cmd, params={}, stream_to=None): def request(self, cmd, params={}, stream_to=None):
@ -98,16 +97,16 @@ class Peer(object):
for retry in range(1, 3): # Retry 3 times for retry in range(1, 3): # Retry 3 times
try: try:
response = self.connection.request(cmd, params, stream_to) res = self.connection.request(cmd, params, stream_to)
if not response: if not res:
raise Exception("Send error") raise Exception("Send error")
if "error" in response: if "error" in res:
self.log("%s error: %s" % (cmd, response["error"])) self.log("%s error: %s" % (cmd, res["error"]))
self.onConnectionError() self.onConnectionError()
else: # Successful request, reset connection error num else: # Successful request, reset connection error num
self.connection_error = 0 self.connection_error = 0
self.last_response = time.time() self.time_response = time.time()
return response return res
except Exception, err: except Exception, err:
if type(err).__name__ == "Notify": # Greenlet killed by worker if type(err).__name__ == "Notify": # Greenlet killed by worker
self.log("Peer worker got killed: %s, aborting cmd: %s" % (err.message, cmd)) self.log("Peer worker got killed: %s, aborting cmd: %s" % (err.message, cmd))
@ -136,21 +135,21 @@ class Peer(object):
s = time.time() s = time.time()
while True: # Read in 512k parts while True: # Read in 512k parts
back = self.request("getFile", {"site": site, "inner_path": inner_path, "location": location}) res = self.request("getFile", {"site": site, "inner_path": inner_path, "location": location})
if not back or "body" not in back: # Error if not res or "body" not in res: # Error
return False return False
buff.write(back["body"]) buff.write(res["body"])
back["body"] = None # Save memory res["body"] = None # Save memory
if back["location"] == back["size"]: # End of file if res["location"] == res["size"]: # End of file
break break
else: else:
location = back["location"] location = res["location"]
self.download_bytes += back["location"] self.download_bytes += res["location"]
self.download_time += (time.time() - s) self.download_time += (time.time() - s)
self.site.settings["bytes_recv"] = self.site.settings.get("bytes_recv", 0) + back["location"] self.site.settings["bytes_recv"] = self.site.settings.get("bytes_recv", 0) + res["location"]
buff.seek(0) buff.seek(0)
return buff return buff
@ -164,20 +163,20 @@ class Peer(object):
s = time.time() s = time.time()
while True: # Read in 512k parts while True: # Read in 512k parts
back = self.request("streamFile", {"site": site, "inner_path": inner_path, "location": location}, stream_to=buff) res = self.request("streamFile", {"site": site, "inner_path": inner_path, "location": location}, stream_to=buff)
if not back: # Error if not res: # Error
self.log("Invalid response: %s" % back) self.log("Invalid response: %s" % res)
return False return False
if back["location"] == back["size"]: # End of file if res["location"] == res["size"]: # End of file
break break
else: else:
location = back["location"] location = res["location"]
self.download_bytes += back["location"] self.download_bytes += res["location"]
self.download_time += (time.time() - s) self.download_time += (time.time() - s)
self.site.settings["bytes_recv"] = self.site.settings.get("bytes_recv", 0) + back["location"] self.site.settings["bytes_recv"] = self.site.settings.get("bytes_recv", 0) + res["location"]
buff.seek(0) buff.seek(0)
return buff return buff
@ -187,9 +186,9 @@ class Peer(object):
for retry in range(1, 3): # Retry 3 times for retry in range(1, 3): # Retry 3 times
s = time.time() s = time.time()
with gevent.Timeout(10.0, False): # 10 sec timeout, don't raise exception with gevent.Timeout(10.0, False): # 10 sec timeout, don't raise exception
response = self.request("ping") res = self.request("ping")
if response and "body" in response and response["body"] == "Pong!": if res and "body" in res and res["body"] == "Pong!":
response_time = time.time() - s response_time = time.time() - s
break # All fine, exit from for loop break # All fine, exit from for loop
# Timeout reached or bad response # Timeout reached or bad response
@ -210,11 +209,11 @@ class Peer(object):
site = self.site # If no site defined request peers for this site site = self.site # If no site defined request peers for this site
# give him/her 5 connectible peers # give him/her 5 connectible peers
packed_peers = [peer.packMyAddress() for peer in self.site.getConnectablePeers(5)] packed_peers = [peer.packMyAddress() for peer in self.site.getConnectablePeers(5)]
response = self.request("pex", {"site": site.address, "peers": packed_peers, "need": need_num}) res = self.request("pex", {"site": site.address, "peers": packed_peers, "need": need_num})
if not response or "error" in response: if not res or "error" in res:
return False return False
added = 0 added = 0
for peer in response.get("peers", []): for peer in res.get("peers", []):
address = helper.unpackAddress(peer) address = helper.unpackAddress(peer)
if site.addPeer(*address): if site.addPeer(*address):
added += 1 added += 1
@ -222,21 +221,31 @@ class Peer(object):
self.log("Added peers using pex: %s" % added) self.log("Added peers using pex: %s" % added)
return added return added
# Request optional files hashfield from peer
def getHashfield(self):
self.last_hashfield = time.time()
res = self.request("getHashfield", {"site": self.site.address})
if res and "error" not in res:
self.hashfield.replaceFromString(res["hashfield_raw"])
return self.hashfield
else:
return False
# List modified files since the date # List modified files since the date
# Return: {inner_path: modification date,...} # Return: {inner_path: modification date,...}
def listModified(self, since): def listModified(self, since):
return self.request("listModified", {"since": since, "site": self.site.address}) return self.request("listModified", {"since": since, "site": self.site.address})
def updateHashfield(self, force=False):
# Don't update hashfield again in 15 min
if self.time_hashfield and time.time() - self.time_hashfield > 60 * 15 and not force:
return False
self.time_hashfield = time.time()
res = self.request("getHashfield", {"site": self.site.address})
if not res or "error" in res:
return False
self.hashfield.replaceFromString(res["hashfield_raw"])
return self.hashfield
# Return: {hash1: ["ip:port", "ip:port",...],...}
def findHashIds(self, hash_ids):
res = self.request("findHashIds", {"site": self.site.address, "hash_ids": hash_ids})
if not res or "error" in res:
return False
return {key: map(helper.unpackAddress, val) for key, val in res["peers"].iteritems()}
# Stop and remove from site # Stop and remove from site
def remove(self): def remove(self):
self.log("Removing peer...Connection error: %s, Hash failed: %s" % (self.connection_error, self.hash_failed)) self.log("Removing peer...Connection error: %s, Hash failed: %s" % (self.connection_error, self.hash_failed))
@ -245,34 +254,6 @@ class Peer(object):
if self.connection: if self.connection:
self.connection.close() self.connection.close()
# - HASHFIELD -
def updateHashfield(self, force=False):
# Don't update hashfield again in 15 min
if self.last_hashfield and time.time() - self.last_hashfield > 60 * 15 and not force:
return False
response = self.request("getHashfield", {"site": self.site.address})
if not response or "error" in response:
return False
self.last_hashfield = time.time()
self.hashfield = response["hashfield"]
return self.hashfield
def setHashfield(self, hashfield_dump):
self.hashfield.fromstring(hashfield_dump)
def hasHash(self, hash_id):
return hash_id in self.hashfield
# Return: ["ip:port", "ip:port",...]
def findHash(self, hash_id):
response = self.request("findHash", {"site": self.site.address, "hash_id": hash_id})
if not response or "error" in response:
return False
return [helper.unpackAddress(peer) for peer in response["peers"]]
# - EVENTS - # - EVENTS -
# On connection error # On connection error

View file

@ -23,6 +23,13 @@ class PeerHashfield():
else: else:
return False return False
def appendHashId(self, hash_id):
if hash_id not in self:
self.append(hash_id)
return True
else:
return False
def removeHash(self, hash): def removeHash(self, hash):
hash_id = int(hash[0:4], 16) hash_id = int(hash[0:4], 16)
if hash_id in self: if hash_id in self:

View file

@ -39,7 +39,7 @@ class Site:
self.content = None # Load content.json self.content = None # Load content.json
self.peers = {} # Key: ip:port, Value: Peer.Peer self.peers = {} # Key: ip:port, Value: Peer.Peer
self.peer_blacklist = SiteManager.peer_blacklist # Ignore this peers (eg. myself) self.peer_blacklist = SiteManager.peer_blacklist # Ignore this peers (eg. myself)
self.last_announce = 0 # Last announce time to tracker self.time_announce = 0 # Last announce time to tracker
self.last_tracker_id = random.randint(0, 10) # Last announced tracker id self.last_tracker_id = random.randint(0, 10) # Last announced tracker id
self.worker_manager = WorkerManager(self) # Handle site download from other peers self.worker_manager = WorkerManager(self) # Handle site download from other peers
self.bad_files = {} # SHA check failed files, need to redownload {"inner.content": 1} (key: file, value: failed accept) self.bad_files = {} # SHA check failed files, need to redownload {"inner.content": 1} (key: file, value: failed accept)
@ -589,9 +589,9 @@ class Site:
# Add myself and get other peers from tracker # Add myself and get other peers from tracker
def announce(self, force=False, num=5, pex=True): def announce(self, force=False, num=5, pex=True):
if time.time() < self.last_announce + 30 and not force: if time.time() < self.time_announce + 30 and not force:
return # No reannouncing within 30 secs return # No reannouncing within 30 secs
self.last_announce = time.time() self.time_announce = time.time()
trackers = config.trackers trackers = config.trackers
if num == 1: # Only announce on one tracker, increment the queried tracker id if num == 1: # Only announce on one tracker, increment the queried tracker id
@ -712,7 +712,7 @@ class Site:
continue continue
if peer.connection and not peer.connection.connected: if peer.connection and not peer.connection.connected:
peer.connection = None # Dead connection peer.connection = None # Dead connection
if time.time() - peer.last_found > 60 * 60 * 4: # Not found on tracker or via pex in last 4 hour if time.time() - peer.time_found > 60 * 60 * 4: # Not found on tracker or via pex in last 4 hour
peer.remove() peer.remove()
removed += 1 removed += 1
if removed > 5: # Don't remove too much at once if removed > 5: # Don't remove too much at once

View file

@ -8,7 +8,7 @@ from cStringIO import StringIO
@pytest.mark.usefixtures("resetSettings") @pytest.mark.usefixtures("resetSettings")
@pytest.mark.usefixtures("resetTempSettings") @pytest.mark.usefixtures("resetTempSettings")
class TestFileRequest: class TestPeer:
def testPing(self, file_server, site, site_temp): def testPing(self, file_server, site, site_temp):
file_server.ip_incoming = {} # Reset flood protection file_server.ip_incoming = {} # Reset flood protection
file_server.sites[site.address] = site file_server.sites[site.address] = site
@ -53,9 +53,13 @@ class TestFileRequest:
def testHashfield(self, site): def testHashfield(self, site):
sample_hash = site.content_manager.contents["content.json"]["files_optional"].values()[0]["sha512"] sample_hash = site.content_manager.contents["content.json"]["files_optional"].values()[0]["sha512"]
assert not site.content_manager.hashfield
site.storage.verifyFiles(quick_check=True) # Find what optional files we have site.storage.verifyFiles(quick_check=True) # Find what optional files we have
# Check if hashfield has any files # Check if hashfield has any files
assert site.content_manager.hashfield
assert len(site.content_manager.hashfield) > 0 assert len(site.content_manager.hashfield) > 0
# Check exsist hash # Check exsist hash
@ -89,8 +93,35 @@ class TestFileRequest:
# Testing hashfield sync # Testing hashfield sync
assert len(peer_file_server.hashfield) == 0 assert len(peer_file_server.hashfield) == 0
assert peer_file_server.getHashfield() assert peer_file_server.updateHashfield()
assert len(peer_file_server.hashfield) > 0 assert len(peer_file_server.hashfield) > 0
connection.close() connection.close()
client.stop() client.stop()
def testFindHash(self, file_server, site, site_temp):
file_server.ip_incoming = {} # Reset flood protection
file_server.sites[site.address] = site
client = FileServer("127.0.0.1", 1545)
client.sites[site_temp.address] = site_temp
site_temp.connection_server = client
# Add file_server as peer to client
peer_file_server = site_temp.addPeer("127.0.0.1", 1544)
assert peer_file_server.findHashIds([1234]) == {}
# Add fake peer with requred hash
fake_peer_1 = site.addPeer("1.2.3.4", 1544)
fake_peer_1.hashfield.append(1234)
fake_peer_2 = site.addPeer("1.2.3.5", 1545)
fake_peer_2.hashfield.append(1234)
fake_peer_2.hashfield.append(1235)
fake_peer_3 = site.addPeer("1.2.3.6", 1546)
fake_peer_3.hashfield.append(1235)
fake_peer_3.hashfield.append(1236)
assert peer_file_server.findHashIds([1234, 1235]) == {
1234: [('1.2.3.4', 1544), ('1.2.3.5', 1545)],
1235: [('1.2.3.5', 1545), ('1.2.3.6', 1546)]
}

View file

@ -1,9 +1,14 @@
import time
import pytest import pytest
import mock import mock
import gevent
from Connection import ConnectionServer from Connection import ConnectionServer
from Config import config from Config import config
from File import FileRequest from File import FileRequest
from File import FileServer
from Site import Site
import Spy import Spy
@ -11,7 +16,8 @@ import Spy
@pytest.mark.usefixtures("resetSettings") @pytest.mark.usefixtures("resetSettings")
class TestSiteDownload: class TestSiteDownload:
def testDownload(self, file_server, site, site_temp): def testDownload(self, file_server, site, site_temp):
client = ConnectionServer("127.0.0.1", 1545) file_server.ip_incoming = {} # Reset flood protection
assert site.storage.directory == config.data_dir + "/" + site.address assert site.storage.directory == config.data_dir + "/" + site.address
assert site_temp.storage.directory == config.data_dir + "-temp/" + site.address assert site_temp.storage.directory == config.data_dir + "-temp/" + site.address
@ -20,6 +26,7 @@ class TestSiteDownload:
file_server.sites[site.address] = site file_server.sites[site.address] = site
# Init client server # Init client server
client = ConnectionServer("127.0.0.1", 1545)
site_temp.connection_server = client site_temp.connection_server = client
site_temp.announce = mock.MagicMock(return_value=True) # Don't try to find peers from the net site_temp.announce = mock.MagicMock(return_value=True) # Don't try to find peers from the net
@ -48,7 +55,33 @@ class TestSiteDownload:
assert len(site_temp.content_manager.contents) == len(site.content_manager.contents) - 1 assert len(site_temp.content_manager.contents) == len(site.content_manager.contents) - 1
assert not bad_files assert not bad_files
# Optional file [connection.close() for connection in file_server.connections]
# Test when connected peer has the optional file
def testOptionalDownload(self, file_server, site, site_temp):
file_server.ip_incoming = {} # Reset flood protection
# Init source server
site.connection_server = file_server
file_server.sites[site.address] = site
# Init client server
client = ConnectionServer("127.0.0.1", 1545)
site_temp.connection_server = client
site_temp.announce = mock.MagicMock(return_value=True) # Don't try to find peers from the net
site_temp.addPeer("127.0.0.1", 1544)
# Download site
site_temp.download(blind_includes=True).join(timeout=5)
# Download optional data/optional.txt
site.storage.verifyFiles(quick_check=True) # Find what optional files we have
optional_file_info = site_temp.content_manager.getFileInfo("data/optional.txt")
assert site.content_manager.hashfield.hasHash(optional_file_info["sha512"])
assert not site_temp.content_manager.hashfield.hasHash(optional_file_info["sha512"])
assert not site_temp.storage.isFile("data/optional.txt") assert not site_temp.storage.isFile("data/optional.txt")
assert site.storage.isFile("data/optional.txt") assert site.storage.isFile("data/optional.txt")
site_temp.needFile("data/optional.txt") site_temp.needFile("data/optional.txt")
@ -59,6 +92,7 @@ class TestSiteDownload:
optional_file_info = site_temp.content_manager.getFileInfo( optional_file_info = site_temp.content_manager.getFileInfo(
"data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif" "data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif"
) )
assert site.content_manager.hashfield.hasHash(optional_file_info["sha512"])
assert not site_temp.content_manager.hashfield.hasHash(optional_file_info["sha512"]) assert not site_temp.content_manager.hashfield.hasHash(optional_file_info["sha512"])
site_temp.needFile("data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif") site_temp.needFile("data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif")
@ -66,3 +100,46 @@ class TestSiteDownload:
assert site_temp.content_manager.hashfield.hasHash(optional_file_info["sha512"]) assert site_temp.content_manager.hashfield.hasHash(optional_file_info["sha512"])
assert site_temp.storage.deleteFiles() assert site_temp.storage.deleteFiles()
[connection.close() for connection in file_server.connections]
# Test when connected peer does not has the file, so ask him if he know someone who has it
def testFindOptional(self, file_server, site, site_temp):
file_server.ip_incoming = {} # Reset flood protection
# Init source server
site.connection_server = file_server
file_server.sites[site.address] = site
# Init full source server (has optional files)
site_full = Site("1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT")
file_server_full = FileServer("127.0.0.1", 1546)
site_full.connection_server = file_server_full
gevent.spawn(lambda: ConnectionServer.start(file_server_full))
time.sleep(0) # Port opening
file_server_full.sites[site_full.address] = site_full # Add site
site_full.storage.verifyFiles(quick_check=True) # Check optional files
site_full_peer = site.addPeer("127.0.0.1", 1546) # Add it to source server
assert site_full_peer.updateHashfield() # Update hashfield
# Init client server
site_temp.connection_server = ConnectionServer("127.0.0.1", 1545)
site_temp.announce = mock.MagicMock(return_value=True) # Don't try to find peers from the net
site_temp.addPeer("127.0.0.1", 1544) # Add source server
# Download normal files
site_temp.download(blind_includes=True).join(timeout=5)
# Download optional data/optional.txt
optional_file_info = site_temp.content_manager.getFileInfo("data/optional.txt")
assert not site_temp.storage.isFile("data/optional.txt")
assert not site.content_manager.hashfield.hasHash(optional_file_info["sha512"]) # Source server don't know he has the file
assert site_full_peer.hashfield.hasHash(optional_file_info["sha512"]) # Source full peer on source server has the file
assert site_full.content_manager.hashfield.hasHash(optional_file_info["sha512"]) # Source full server he has the file
with Spy.Spy(FileRequest, "route") as requests:
site_temp.needFile("data/optional.txt")
print requests
assert site_temp.storage.deleteFiles()
file_server_full.stop()
[connection.close() for connection in file_server.connections]

View file

@ -1,10 +1,13 @@
import time import time
import logging import logging
import random import random
import collections
import gevent import gevent
from Worker import Worker from Worker import Worker
from util import helper
import util
MAX_WORKERS = 10 # Max concurent workers MAX_WORKERS = 10 # Max concurent workers
@ -15,7 +18,7 @@ class WorkerManager:
self.site = site self.site = site
self.workers = {} # Key: ip:port, Value: Worker.Worker self.workers = {} # Key: ip:port, Value: Worker.Worker
self.tasks = [] self.tasks = []
# {"evt": evt, "workers_num": 0, "site": self.site, "inner_path": inner_path, "done": False, # {"evt": evt, "workers_num": 0, "site": self.site, "inner_path": inner_path, "done": False, "optional_hash_id": None,
# "time_started": None, "time_added": time.time(), "peers": peers, "priority": 0, "failed": peer_ids} # "time_started": None, "time_added": time.time(), "peers": peers, "priority": 0, "failed": peer_ids}
self.started_task_num = 0 # Last added task num self.started_task_num = 0 # Last added task num
self.running = True self.running = True
@ -60,8 +63,15 @@ class WorkerManager:
elif (task["time_started"] and time.time() >= task["time_started"] + 15) or not self.workers: elif (task["time_started"] and time.time() >= task["time_started"] + 15) or not self.workers:
# Task started more than 15 sec ago or no workers # Task started more than 15 sec ago or no workers
self.log.debug("Task taking more than 15 secs, find more peers: %s" % task["inner_path"]) workers = self.findWorkers(task)
self.log.debug(
"Task taking more than 15 secs, workers: %s find more peers: %s" %
(len(workers), task["inner_path"])
)
task["site"].announce(num=1) # Find more peers task["site"].announce(num=1) # Find more peers
if task["optional_hash_id"]:
self.startFindOptional()
else:
if task["peers"]: # Release the peer lock if task["peers"]: # Release the peer lock
self.log.debug("Task peer lock release: %s" % task["inner_path"]) self.log.debug("Task peer lock release: %s" % task["inner_path"])
task["peers"] = [] task["peers"] = []
@ -100,6 +110,8 @@ class WorkerManager:
continue # This peer not allowed to pick this task continue # This peer not allowed to pick this task
if peer in task["failed"]: if peer in task["failed"]:
continue # Peer already tried to solve this, but failed continue # Peer already tried to solve this, but failed
if task["optional_hash_id"] and task["peers"] is None:
continue # No peers found yet for the optional task
return task return task
# New peers added to site # New peers added to site
@ -135,6 +147,114 @@ class WorkerManager:
if worker: if worker:
self.log.debug("Added worker: %s, workers: %s/%s" % (peer.key, len(self.workers), MAX_WORKERS)) self.log.debug("Added worker: %s, workers: %s/%s" % (peer.key, len(self.workers), MAX_WORKERS))
# Find peers for optional hash in local hash tables and add to task peers
def findOptionalTasks(self, optional_tasks):
found = collections.defaultdict(list) # { found_hash: [peer1, peer2...], ...}
for peer in self.site.peers.values():
if not peer.hashfield:
continue
for task in optional_tasks:
optional_hash_id = task["optional_hash_id"]
if optional_hash_id in peer.hashfield:
found[optional_hash_id].append(peer)
if task["peers"] and peer not in task["peers"]:
task["peers"].append(peer)
else:
task["peers"] = [peer]
return found
# Find peers for optional hash ids in local hash tables
def findOptionalHashIds(self, optional_hash_ids):
found = collections.defaultdict(list) # { found_hash_id: [peer1, peer2...], ...}
for peer in self.site.peers.values():
if not peer.hashfield:
continue
for optional_hash_id in optional_hash_ids:
if optional_hash_id in peer.hashfield:
found[optional_hash_id].append(peer)
return found
# Add peers to tasks from found result
def addOptionalPeers(self, found_ips):
found = collections.defaultdict(list)
for hash_id, peer_ips in found_ips.iteritems():
task = [task for task in self.tasks if task["optional_hash_id"] == hash_id]
if task: # Found task, lets take the first
task = task[0]
for peer_ip in peer_ips:
peer = self.site.addPeer(peer_ip[0], peer_ip[1], return_peer=True)
if not peer:
continue
if task["peers"] is None:
task["peers"] = []
if peer not in task["peers"]:
task["peers"].append(peer)
peer.hashfield.appendHashId(hash_id) # Peer has this file
found[hash_id].append(peer)
return found
# Start find peers for optional files
@util.Noparallel(blocking=False)
def startFindOptional(self):
time.sleep(0.01) # Wait for more file requests
optional_tasks = [task for task in self.tasks if task["optional_hash_id"]]
optional_hash_ids = set([task["optional_hash_id"] for task in optional_tasks])
self.log.debug("Finding peers for optional files: %s" % optional_hash_ids)
found = self.findOptionalTasks(optional_tasks)
if found:
found_peers = set([peer for peers in found.values() for peer in peers])
self.startWorkers(found_peers)
if len(found) < len(optional_hash_ids):
self.log.debug("No local result for optional files: %s" % (optional_hash_ids - set(found)))
# Query hashfield from connected peers
threads = []
peers = self.site.getConnectedPeers()
if not peers:
peers = self.site.getConnectablePeers()
for peer in peers:
if not peer.time_hashfield:
threads.append(gevent.spawn(peer.updateHashfield))
gevent.joinall(threads, timeout=5)
found = self.findOptionalTasks(optional_tasks)
if found:
found_peers = set([peer for hash_id_peers in found.values() for peer in hash_id_peers])
self.startWorkers(found_peers)
if len(found) < len(optional_hash_ids):
self.log.debug("No connected hashtable result for optional files: %s" % (optional_hash_ids - set(found)))
# Try to query connected peers
threads = []
peers = self.site.getConnectedPeers()
if not peers:
peers = self.site.getConnectablePeers()
for peer in peers:
threads.append(gevent.spawn(peer.findHashIds, list(optional_hash_ids)))
gevent.joinall(threads, timeout=5)
found_ips = helper.mergeDicts([thread.value for thread in threads if thread.value])
found = self.addOptionalPeers(found_ips)
if found:
found_peers = set([peer for hash_id_peers in found.values() for peer in hash_id_peers])
self.startWorkers(found_peers)
if len(found) < len(optional_hash_ids):
self.log.debug("No findhash result for optional files: %s" % (optional_hash_ids - set(found)))
# Stop all worker # Stop all worker
def stopWorkers(self): def stopWorkers(self):
for worker in self.workers.values(): for worker in self.workers.values():
@ -181,16 +301,27 @@ class WorkerManager:
peers = [peer] # Only download from this peer peers = [peer] # Only download from this peer
else: else:
peers = None peers = None
file_info = self.site.content_manager.getFileInfo(inner_path)
if file_info and file_info["optional"]:
optional_hash_id = helper.toHashId(file_info["sha512"])
else:
optional_hash_id = None
task = { task = {
"evt": evt, "workers_num": 0, "site": self.site, "inner_path": inner_path, "done": False, "evt": evt, "workers_num": 0, "site": self.site, "inner_path": inner_path, "done": False, "optional_hash_id": optional_hash_id,
"time_added": time.time(), "time_started": None, "peers": peers, "priority": priority, "failed": [] "time_added": time.time(), "time_started": None, "time_action": None, "peers": peers, "priority": priority, "failed": []
} }
self.tasks.append(task) self.tasks.append(task)
self.started_task_num += 1 self.started_task_num += 1
self.log.debug( self.log.debug(
"New task: %s, peer lock: %s, priority: %s, tasks: %s" % "New task: %s, peer lock: %s, priority: %s, optional_hash_id: %s, tasks: %s" %
(task["inner_path"], peers, priority, self.started_task_num) (task["inner_path"], peers, priority, optional_hash_id, self.started_task_num)
) )
if optional_hash_id:
self.startFindOptional()
self.startWorkers(peers) self.startWorkers(peers)
return evt return evt

View file

@ -2,6 +2,7 @@ import os
import socket import socket
import struct import struct
import re import re
import collections
def atomicWrite(dest, content, mode="w"): def atomicWrite(dest, content, mode="w"):
@ -44,3 +45,17 @@ def getDirname(path):
# Return: data/site/content.json -> content.json # Return: data/site/content.json -> content.json
def getFilename(path): def getFilename(path):
return re.sub("^.*/", "", path) return re.sub("^.*/", "", path)
# Convert hash to hashid for hashfield
def toHashId(hash):
return int(hash[0:4], 16)
# Merge dict values
def mergeDicts(dicts):
back = collections.defaultdict(set)
for d in dicts:
for key, val in d.iteritems():
back[key].update(val)
return dict(back)