diff --git a/build-docker-images.sh b/build-docker-images.sh new file mode 100755 index 00000000..8eff34f4 --- /dev/null +++ b/build-docker-images.sh @@ -0,0 +1,32 @@ +#!/bin/sh +set -e + +arg_push= + +case "$1" in + --push) arg_push=y ; shift ;; +esac + +default_suffix=alpine +prefix="${1:-local/}" + +for dokerfile in dockerfiles/Dockerfile.* ; do + suffix="`echo "$dokerfile" | sed 's/.*\/Dockerfile\.//'`" + image_name="${prefix}zeronet:$suffix" + + latest="" + t_latest="" + if [ "$suffix" = "$default_suffix" ] ; then + latest="${prefix}zeronet:latest" + t_latest="-t ${latest}" + fi + + echo "DOCKER BUILD $image_name" + docker build -f "$dokerfile" -t "$image_name" $t_latest . + if [ -n "$arg_push" ] ; then + docker push "$image_name" + if [ -n "$latest" ] ; then + docker push "$latest" + fi + fi +done diff --git a/dockerfiles/Dockerfile.alpine b/dockerfiles/Dockerfile.alpine new file mode 120000 index 00000000..0f848cc8 --- /dev/null +++ b/dockerfiles/Dockerfile.alpine @@ -0,0 +1 @@ +Dockerfile.alpine3.13 \ No newline at end of file diff --git a/dockerfiles/Dockerfile.alpine3.13 b/dockerfiles/Dockerfile.alpine3.13 new file mode 100644 index 00000000..79f15b9b --- /dev/null +++ b/dockerfiles/Dockerfile.alpine3.13 @@ -0,0 +1,44 @@ +# THIS FILE IS AUTOGENERATED BY gen-dockerfiles.sh. +# SEE zeronet-Dockerfile FOR THE SOURCE FILE. + +FROM alpine:3.13 + +# Base settings +ENV HOME /root + +# Install packages + +# Install packages + +COPY install-dep-packages.sh /root/install-dep-packages.sh + +RUN /root/install-dep-packages.sh install + +COPY requirements.txt /root/requirements.txt + +RUN pip3 install -r /root/requirements.txt \ + && /root/install-dep-packages.sh remove-makedeps \ + && echo "ControlPort 9051" >> /etc/tor/torrc \ + && echo "CookieAuthentication 1" >> /etc/tor/torrc + +RUN python3 -V \ + && python3 -m pip list \ + && tor --version \ + && openssl version + +# Add Zeronet source + +COPY . /root +VOLUME /root/data + +# Control if Tor proxy is started +ENV ENABLE_TOR false + +WORKDIR /root + +# Set upstart command +CMD (! ${ENABLE_TOR} || tor&) && python3 zeronet.py --ui_ip 0.0.0.0 --fileserver_port 26552 + +# Expose ports +EXPOSE 43110 26552 + diff --git a/dockerfiles/Dockerfile.ubuntu b/dockerfiles/Dockerfile.ubuntu new file mode 120000 index 00000000..29adf7ef --- /dev/null +++ b/dockerfiles/Dockerfile.ubuntu @@ -0,0 +1 @@ +Dockerfile.ubuntu20.04 \ No newline at end of file diff --git a/dockerfiles/Dockerfile.ubuntu20.04 b/dockerfiles/Dockerfile.ubuntu20.04 new file mode 100644 index 00000000..bc32cf86 --- /dev/null +++ b/dockerfiles/Dockerfile.ubuntu20.04 @@ -0,0 +1,44 @@ +# THIS FILE IS AUTOGENERATED BY gen-dockerfiles.sh. +# SEE zeronet-Dockerfile FOR THE SOURCE FILE. + +FROM ubuntu:20.04 + +# Base settings +ENV HOME /root + +# Install packages + +# Install packages + +COPY install-dep-packages.sh /root/install-dep-packages.sh + +RUN /root/install-dep-packages.sh install + +COPY requirements.txt /root/requirements.txt + +RUN pip3 install -r /root/requirements.txt \ + && /root/install-dep-packages.sh remove-makedeps \ + && echo "ControlPort 9051" >> /etc/tor/torrc \ + && echo "CookieAuthentication 1" >> /etc/tor/torrc + +RUN python3 -V \ + && python3 -m pip list \ + && tor --version \ + && openssl version + +# Add Zeronet source + +COPY . /root +VOLUME /root/data + +# Control if Tor proxy is started +ENV ENABLE_TOR false + +WORKDIR /root + +# Set upstart command +CMD (! ${ENABLE_TOR} || tor&) && python3 zeronet.py --ui_ip 0.0.0.0 --fileserver_port 26552 + +# Expose ports +EXPOSE 43110 26552 + diff --git a/dockerfiles/gen-dockerfiles.sh b/dockerfiles/gen-dockerfiles.sh new file mode 100755 index 00000000..75a6edf6 --- /dev/null +++ b/dockerfiles/gen-dockerfiles.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +set -e + +die() { + echo "$@" > /dev/stderr + exit 1 +} + +for os in alpine:3.13 ubuntu:20.04 ; do + prefix="`echo "$os" | sed -e 's/://'`" + short_prefix="`echo "$os" | sed -e 's/:.*//'`" + + zeronet="zeronet-Dockerfile" + + dockerfile="Dockerfile.$prefix" + dockerfile_short="Dockerfile.$short_prefix" + + echo "GEN $dockerfile" + + if ! test -f "$zeronet" ; then + die "No such file: $zeronet" + fi + + echo "\ +# THIS FILE IS AUTOGENERATED BY gen-dockerfiles.sh. +# SEE $zeronet FOR THE SOURCE FILE. + +FROM $os + +`cat "$zeronet"` +" > "$dockerfile.tmp" && mv "$dockerfile.tmp" "$dockerfile" && ln -s -f "$dockerfile" "$dockerfile_short" +done + diff --git a/Dockerfile.arm64v8 b/dockerfiles/zeronet-Dockerfile similarity index 55% rename from Dockerfile.arm64v8 rename to dockerfiles/zeronet-Dockerfile index d27b7620..92c67c84 100644 --- a/Dockerfile.arm64v8 +++ b/dockerfiles/zeronet-Dockerfile @@ -1,34 +1,36 @@ -FROM alpine:3.12 - -#Base settings +# Base settings ENV HOME /root +# Install packages + +COPY install-dep-packages.sh /root/install-dep-packages.sh + +RUN /root/install-dep-packages.sh install + COPY requirements.txt /root/requirements.txt -#Install ZeroNet -RUN apk --update --no-cache --no-progress add python3 python3-dev gcc libffi-dev musl-dev make tor openssl \ - && pip3 install -r /root/requirements.txt \ - && apk del python3-dev gcc libffi-dev musl-dev make \ +RUN pip3 install -r /root/requirements.txt \ + && /root/install-dep-packages.sh remove-makedeps \ && echo "ControlPort 9051" >> /etc/tor/torrc \ && echo "CookieAuthentication 1" >> /etc/tor/torrc - + RUN python3 -V \ && python3 -m pip list \ && tor --version \ && openssl version -#Add Zeronet source +# Add Zeronet source + COPY . /root VOLUME /root/data -#Control if Tor proxy is started +# Control if Tor proxy is started ENV ENABLE_TOR false WORKDIR /root -#Set upstart command +# Set upstart command CMD (! ${ENABLE_TOR} || tor&) && python3 zeronet.py --ui_ip 0.0.0.0 --fileserver_port 26552 -#Expose ports +# Expose ports EXPOSE 43110 26552 - diff --git a/install-dep-packages.sh b/install-dep-packages.sh new file mode 100755 index 00000000..655a33aa --- /dev/null +++ b/install-dep-packages.sh @@ -0,0 +1,49 @@ +#!/bin/sh +set -e + +do_alpine() { + local deps="python3 py3-pip openssl tor" + local makedeps="python3-dev gcc g++ libffi-dev musl-dev make automake autoconf libtool" + + case "$1" in + install) + apk --update --no-cache --no-progress add $deps $makedeps + ;; + remove-makedeps) + apk del $makedeps + ;; + esac +} + +do_ubuntu() { + local deps="python3 python3-pip openssl tor" + local makedeps="python3-dev gcc g++ libffi-dev make automake autoconf libtool" + + case "$1" in + install) + apt-get update && \ + apt-get install --no-install-recommends -y $deps $makedeps && \ + rm -rf /var/lib/apt/lists/* + ;; + remove-makedeps) + apt-get remove -y $makedeps + ;; + esac +} + +if test -f /etc/os-release ; then + . /etc/os-release +elif test -f /usr/lib/os-release ; then + . /usr/lib/os-release +else + echo "No such file: /etc/os-release" > /dev/stderr + exit 1 +fi + +case "$ID" in + ubuntu) do_ubuntu "$@" ;; + alpine) do_alpine "$@" ;; + *) + echo "Unsupported OS ID: $ID" > /dev/stderr + exit 1 +esac diff --git a/src/Config.py b/src/Config.py index 71a725cc..4143285b 100644 --- a/src/Config.py +++ b/src/Config.py @@ -82,6 +82,17 @@ class Config(object): from Crypt import CryptHash access_key_default = CryptHash.random(24, "base64") # Used to allow restrited plugins when multiuser plugin is enabled trackers = [ + # by zeroseed at http://127.0.0.1:43110/19HKdTAeBh5nRiKn791czY7TwRB1QNrf1Q/?:users/1HvNGwHKqhj3ZMEM53tz6jbdqe4LRpanEu:zn:dc17f896-bf3f-4962-bdd4-0a470040c9c5 + "zero://k5w77dozo3hy5zualyhni6vrh73iwfkaofa64abbilwyhhd3wgenbjqd.onion:15441", + "zero://2kcb2fqesyaevc4lntogupa4mkdssth2ypfwczd2ov5a3zo6ytwwbayd.onion:15441", + "zero://my562dxpjropcd5hy3nd5pemsc4aavbiptci5amwxzbelmzgkkuxpvid.onion:15441", + "zero://pn4q2zzt2pw4nk7yidxvsxmydko7dfibuzxdswi6gu6ninjpofvqs2id.onion:15441", + "zero://6i54dd5th73oelv636ivix6sjnwfgk2qsltnyvswagwphub375t3xcad.onion:15441", + "zero://tl74auz4tyqv4bieeclmyoe4uwtoc2dj7fdqv4nc4gl5j2bwg2r26bqd.onion:15441", + "zero://wlxav3szbrdhest4j7dib2vgbrd7uj7u7rnuzg22cxbih7yxyg2hsmid.onion:15441", + "zero://zy7wttvjtsijt5uwmlar4yguvjc2gppzbdj4v6bujng6xwjmkdg7uvqd.onion:15441", + + # ZeroNet 0.7.2 defaults: "zero://boot3rdez4rzn36x.onion:15441", "http://open.acgnxtracker.com:80/announce", # DE "http://tracker.bt4g.com:2095/announce", # Cloudflare @@ -275,10 +286,29 @@ class Config(object): self.parser.add_argument('--size_limit', help='Default site size limit in MB', default=10, type=int, metavar='limit') self.parser.add_argument('--file_size_limit', help='Maximum per file size limit in MB', default=10, type=int, metavar='limit') - self.parser.add_argument('--connected_limit', help='Max connected peer per site', default=8, type=int, metavar='connected_limit') - self.parser.add_argument('--global_connected_limit', help='Max connections', default=512, type=int, metavar='global_connected_limit') + self.parser.add_argument('--connected_limit', help='Max number of connected peers per site. Soft limit.', default=10, type=int, metavar='connected_limit') + self.parser.add_argument('--global_connected_limit', help='Max number of connections. Soft limit.', default=512, type=int, metavar='global_connected_limit') self.parser.add_argument('--workers', help='Download workers per site', default=5, type=int, metavar='workers') + self.parser.add_argument('--site_announce_interval_min', help='Site announce interval for the most active sites, in minutes.', default=4, type=int, metavar='site_announce_interval_min') + self.parser.add_argument('--site_announce_interval_max', help='Site announce interval for inactive sites, in minutes.', default=30, type=int, metavar='site_announce_interval_max') + + self.parser.add_argument('--site_peer_check_interval_min', help='Connectable peers check interval for the most active sites, in minutes.', default=5, type=int, metavar='site_peer_check_interval_min') + self.parser.add_argument('--site_peer_check_interval_max', help='Connectable peers check interval for inactive sites, in minutes.', default=20, type=int, metavar='site_peer_check_interval_max') + + self.parser.add_argument('--site_update_check_interval_min', help='Site update check interval for the most active sites, in minutes.', default=5, type=int, metavar='site_update_check_interval_min') + self.parser.add_argument('--site_update_check_interval_max', help='Site update check interval for inactive sites, in minutes.', default=45, type=int, metavar='site_update_check_interval_max') + + self.parser.add_argument('--site_connectable_peer_count_max', help='Search for as many connectable peers for the most active sites', default=10, type=int, metavar='site_connectable_peer_count_max') + self.parser.add_argument('--site_connectable_peer_count_min', help='Search for as many connectable peers for inactive sites', default=2, type=int, metavar='site_connectable_peer_count_min') + + self.parser.add_argument('--send_back_lru_size', help='Size of the send back LRU cache', default=5000, type=int, metavar='send_back_lru_size') + self.parser.add_argument('--send_back_limit', help='Send no more than so many files at once back to peer, when we discovered that the peer held older file versions', default=3, type=int, metavar='send_back_limit') + + self.parser.add_argument('--expose_no_ownership', help='By default, ZeroNet tries checking updates for own sites more frequently. This can be used by a third party for revealing the network addresses of a site owner. If this option is enabled, ZeroNet performs the checks in the same way for any sites.', type='bool', choices=[True, False], default=False) + + self.parser.add_argument('--simultaneous_connection_throttle_threshold', help='Throttle opening new connections when the number of outgoing connections in not fully established state exceeds the threshold.', default=15, type=int, metavar='simultaneous_connection_throttle_threshold') + self.parser.add_argument('--fileserver_ip', help='FileServer bind address', default="*", metavar='ip') self.parser.add_argument('--fileserver_port', help='FileServer bind port (0: randomize)', default=0, type=int, metavar='port') self.parser.add_argument('--fileserver_port_range', help='FileServer randomization range', default="10000-40000", metavar='port') diff --git a/src/Connection/Connection.py b/src/Connection/Connection.py index 22bcf29c..ad1312f2 100644 --- a/src/Connection/Connection.py +++ b/src/Connection/Connection.py @@ -17,12 +17,13 @@ from util import helper class Connection(object): __slots__ = ( "sock", "sock_wrapped", "ip", "port", "cert_pin", "target_onion", "id", "protocol", "type", "server", "unpacker", "unpacker_bytes", "req_id", "ip_type", - "handshake", "crypt", "connected", "event_connected", "closed", "start_time", "handshake_time", "last_recv_time", "is_private_ip", "is_tracker_connection", + "handshake", "crypt", "connected", "connecting", "event_connected", "closed", "start_time", "handshake_time", "last_recv_time", "is_private_ip", "is_tracker_connection", "last_message_time", "last_send_time", "last_sent_time", "incomplete_buff_recv", "bytes_recv", "bytes_sent", "cpu_time", "send_lock", "last_ping_delay", "last_req_time", "last_cmd_sent", "last_cmd_recv", "bad_actions", "sites", "name", "waiting_requests", "waiting_streams" ) def __init__(self, server, ip, port, sock=None, target_onion=None, is_tracker_connection=False): + self.server = server self.sock = sock self.cert_pin = None if "#" in ip: @@ -42,7 +43,6 @@ class Connection(object): self.is_private_ip = False self.is_tracker_connection = is_tracker_connection - self.server = server self.unpacker = None # Stream incoming socket messages here self.unpacker_bytes = 0 # How many bytes the unpacker received self.req_id = 0 # Last request id @@ -50,6 +50,7 @@ class Connection(object): self.crypt = None # Connection encryption method self.sock_wrapped = False # Socket wrapped to encryption + self.connecting = False self.connected = False self.event_connected = gevent.event.AsyncResult() # Solves on handshake received self.closed = False @@ -81,11 +82,11 @@ class Connection(object): def setIp(self, ip): self.ip = ip - self.ip_type = helper.getIpType(ip) + self.ip_type = self.server.getIpType(ip) self.updateName() def createSocket(self): - if helper.getIpType(self.ip) == "ipv6" and not hasattr(socket, "socket_noproxy"): + if self.server.getIpType(self.ip) == "ipv6" and not hasattr(socket, "socket_noproxy"): # Create IPv6 connection as IPv4 when using proxy return socket.socket(socket.AF_INET6, socket.SOCK_STREAM) else: @@ -118,13 +119,28 @@ class Connection(object): # Open connection to peer and wait for handshake def connect(self): + self.connecting = True + try: + return self._connect() + except Exception as err: + self.connecting = False + self.connected = False + raise + + def _connect(self): + self.updateOnlineStatus(outgoing_activity=True) + + if not self.event_connected or self.event_connected.ready(): + self.event_connected = gevent.event.AsyncResult() + self.type = "out" + + unreachability = self.server.getIpUnreachability(self.ip) + if unreachability: + raise Exception(unreachability) + if self.ip_type == "onion": - if not self.server.tor_manager or not self.server.tor_manager.enabled: - raise Exception("Can't connect to onion addresses, no Tor controller present") self.sock = self.server.tor_manager.createSocket(self.ip, self.port) - elif config.tor == "always" and helper.isPrivateIp(self.ip) and self.ip not in config.ip_local: - raise Exception("Can't connect to local IPs in Tor: always mode") elif config.trackers_proxy != "disable" and config.tor != "always" and self.is_tracker_connection: if config.trackers_proxy == "tor": self.sock = self.server.tor_manager.createSocket(self.ip, self.port) @@ -148,37 +164,56 @@ class Connection(object): self.sock.connect(sock_address) - # Implicit SSL - should_encrypt = not self.ip_type == "onion" and self.ip not in self.server.broken_ssl_ips and self.ip not in config.ip_local - if self.cert_pin: - self.sock = CryptConnection.manager.wrapSocket(self.sock, "tls-rsa", cert_pin=self.cert_pin) - self.sock.do_handshake() - self.crypt = "tls-rsa" - self.sock_wrapped = True - elif should_encrypt and "tls-rsa" in CryptConnection.manager.crypt_supported: + if self.shouldEncrypt(): try: - self.sock = CryptConnection.manager.wrapSocket(self.sock, "tls-rsa") - self.sock.do_handshake() - self.crypt = "tls-rsa" - self.sock_wrapped = True + self.wrapSocket() except Exception as err: - if not config.force_encryption: - self.log("Crypt connection error, adding %s:%s as broken ssl. %s" % (self.ip, self.port, Debug.formatException(err))) - self.server.broken_ssl_ips[self.ip] = True - self.sock.close() - self.crypt = None - self.sock = self.createSocket() - self.sock.settimeout(30) - self.sock.connect(sock_address) + if self.sock: + self.sock.close() + self.sock = None + if self.mustEncrypt(): + raise + self.log("Crypt connection error, adding %s:%s as broken ssl. %s" % (self.ip, self.port, Debug.formatException(err))) + self.server.broken_ssl_ips[self.ip] = True + return self.connect() # Detect protocol - self.send({"cmd": "handshake", "req_id": 0, "params": self.getHandshakeInfo()}) event_connected = self.event_connected - gevent.spawn(self.messageLoop) + self.send({"cmd": "handshake", "req_id": 0, "params": self.getHandshakeInfo()}) + self.server.outgoing_pool.spawn(self.messageLoop) connect_res = event_connected.get() # Wait for handshake - self.sock.settimeout(timeout_before) + if self.sock: + self.sock.settimeout(timeout_before) return connect_res + def mustEncrypt(self): + if self.cert_pin: + return True + if (not self.ip_type == "onion") and config.force_encryption: + return True + return False + + def shouldEncrypt(self): + if self.mustEncrypt(): + return True + return ( + (not self.ip_type == "onion") + and + (self.ip not in self.server.broken_ssl_ips) + and + (self.ip not in config.ip_local) + and + ("tls-rsa" in CryptConnection.manager.crypt_supported) + ) + + def wrapSocket(self, crypt="tls-rsa", do_handshake=True): + server = (self.type == "in") + sock = CryptConnection.manager.wrapSocket(self.sock, crypt, server=server, cert_pin=self.cert_pin) + sock.do_handshake() + self.crypt = crypt + self.sock_wrapped = True + self.sock = sock + # Handle incoming connection def handleIncomingConnection(self, sock): self.log("Incoming connection...") @@ -192,9 +227,7 @@ class Connection(object): first_byte = sock.recv(1, gevent.socket.MSG_PEEK) if first_byte == b"\x16": self.log("Crypt in connection using implicit SSL") - self.sock = CryptConnection.manager.wrapSocket(self.sock, "tls-rsa", True) - self.sock_wrapped = True - self.crypt = "tls-rsa" + self.wrapSocket(do_handshake=False) except Exception as err: self.log("Socket peek error: %s" % Debug.formatException(err)) self.messageLoop() @@ -213,6 +246,7 @@ class Connection(object): self.protocol = "v2" self.updateName() self.connected = True + self.connecting = False buff_len = 0 req_len = 0 self.unpacker_bytes = 0 @@ -435,13 +469,13 @@ class Connection(object): self.updateName() self.event_connected.set(True) # Mark handshake as done - self.event_connected = None self.handshake_time = time.time() # Handle incoming message def handleMessage(self, message): cmd = message["cmd"] + self.updateOnlineStatus(successful_activity=True) self.last_message_time = time.time() self.last_cmd_recv = cmd if cmd == "response": # New style response @@ -458,12 +492,10 @@ class Connection(object): self.last_ping_delay = ping # Server switched to crypt, lets do it also if not crypted already if message.get("crypt") and not self.sock_wrapped: - self.crypt = message["crypt"] + crypt = message["crypt"] server = (self.type == "in") - self.log("Crypt out connection using: %s (server side: %s, ping: %.3fs)..." % (self.crypt, server, ping)) - self.sock = CryptConnection.manager.wrapSocket(self.sock, self.crypt, server, cert_pin=self.cert_pin) - self.sock.do_handshake() - self.sock_wrapped = True + self.log("Crypt out connection using: %s (server side: %s, ping: %.3fs)..." % (crypt, server, ping)) + self.wrapSocket(crypt) if not self.sock_wrapped and self.cert_pin: self.close("Crypt connection error: Socket not encrypted, but certificate pin present") @@ -491,8 +523,7 @@ class Connection(object): server = (self.type == "in") self.log("Crypt in connection using: %s (server side: %s)..." % (self.crypt, server)) try: - self.sock = CryptConnection.manager.wrapSocket(self.sock, self.crypt, server, cert_pin=self.cert_pin) - self.sock_wrapped = True + self.wrapSocket(self.crypt) except Exception as err: if not config.force_encryption: self.log("Crypt connection error, adding %s:%s as broken ssl. %s" % (self.ip, self.port, Debug.formatException(err))) @@ -504,6 +535,7 @@ class Connection(object): # Send data to connection def send(self, message, streaming=False): + self.updateOnlineStatus(outgoing_activity=True) self.last_send_time = time.time() if config.debug_socket: self.log("Send: %s, to: %s, streaming: %s, site: %s, inner_path: %s, req_id: %s" % ( @@ -543,6 +575,11 @@ class Connection(object): message = None with self.send_lock: self.sock.sendall(data) + # XXX: Should not be used here: + # self.updateOnlineStatus(successful_activity=True) + # Looks like self.sock.sendall() returns normally, instead of + # raising an Exception (at least, some times). + # So the only way of detecting the network activity is self.handleMessage() except Exception as err: self.close("Send error: %s (cmd: %s)" % (err, stat_key)) return False @@ -584,7 +621,8 @@ class Connection(object): self.waiting_requests[self.req_id] = {"evt": event, "cmd": cmd} if stream_to: self.waiting_streams[self.req_id] = stream_to - self.send(data) # Send request + if not self.send(data): # Send request + return False res = event.get() # Wait until event solves return res @@ -608,6 +646,7 @@ class Connection(object): return False # Already closed self.closed = True self.connected = False + self.connecting = False if self.event_connected: self.event_connected.set(False) @@ -633,3 +672,12 @@ class Connection(object): self.sock = None self.unpacker = None self.event_connected = None + self.crypt = None + self.sock_wrapped = False + + return True + + def updateOnlineStatus(self, outgoing_activity=False, successful_activity=False): + self.server.updateOnlineStatus(self, + outgoing_activity=outgoing_activity, + successful_activity=successful_activity) diff --git a/src/Connection/ConnectionServer.py b/src/Connection/ConnectionServer.py index 090d96a6..5f5e7a29 100644 --- a/src/Connection/ConnectionServer.py +++ b/src/Connection/ConnectionServer.py @@ -1,4 +1,5 @@ import logging +import re import time import sys import socket @@ -8,6 +9,7 @@ import gevent import msgpack from gevent.server import StreamServer from gevent.pool import Pool +import gevent.event import util from util import helper @@ -36,20 +38,32 @@ class ConnectionServer(object): self.port_opened = {} self.peer_blacklist = SiteManager.peer_blacklist + self.managed_pools = {} + self.tor_manager = TorManager(self.ip, self.port) self.connections = [] # Connections self.whitelist = config.ip_local # No flood protection on this ips self.ip_incoming = {} # Incoming connections from ip in the last minute to avoid connection flood self.broken_ssl_ips = {} # Peerids of broken ssl connections self.ips = {} # Connection by ip + self.has_internet = True # Internet outage detection + self.internet_online_since = 0 + self.internet_offline_since = 0 + self.last_outgoing_internet_activity_time = 0 # Last time the application tried to send any data + self.last_successful_internet_activity_time = 0 # Last time the application successfully sent or received any data + self.internet_outage_threshold = 60 * 2 self.stream_server = None self.stream_server_proxy = None self.running = False self.stopping = False + self.stopping_event = gevent.event.Event() self.thread_checker = None + self.thread_pool = Pool(None) + self.managed_pools["thread"] = self.thread_pool + self.stat_recv = defaultdict(lambda: defaultdict(int)) self.stat_sent = defaultdict(lambda: defaultdict(int)) self.bytes_recv = 0 @@ -61,8 +75,14 @@ class ConnectionServer(object): self.num_outgoing = 0 self.had_external_incoming = False + + self.timecorrection = 0.0 self.pool = Pool(500) # do not accept more than 500 connections + self.managed_pools["incoming"] = self.pool + + self.outgoing_pool = Pool(None) + self.managed_pools["outgoing"] = self.outgoing_pool # Bittorrent style peerid self.peer_id = "-UT3530-%s" % CryptHash.random(12, "base64") @@ -83,10 +103,11 @@ class ConnectionServer(object): return False self.running = True if check_connections: - self.thread_checker = gevent.spawn(self.checkConnections) + self.thread_checker = self.spawn(self.checkConnections) CryptConnection.manager.loadCerts() if config.tor != "disable": self.tor_manager.start() + self.tor_manager.startOnions() if not self.port: self.log.info("No port found, not binding") return False @@ -107,7 +128,7 @@ class ConnectionServer(object): return None if self.stream_server_proxy: - gevent.spawn(self.listenProxy) + self.spawn(self.listenProxy) try: self.stream_server.serve_forever() except Exception as err: @@ -115,22 +136,92 @@ class ConnectionServer(object): return False self.log.debug("Stopped.") - def stop(self): + def stop(self, ui_websocket=None): self.log.debug("Stopping %s" % self.stream_server) self.stopping = True self.running = False + self.stopping_event.set() + self.onStop(ui_websocket=ui_websocket) + + def onStop(self, ui_websocket=None): + timeout = 30 + start_time = time.time() + join_quantum = 0.1 + prev_msg = None + while True: + if time.time() >= start_time + timeout: + break + + total_size = 0 + sizes = {} + timestep = 0 + for name, pool in list(self.managed_pools.items()): + timestep += join_quantum + pool.join(timeout=join_quantum) + size = len(pool) + if size: + sizes[name] = size + total_size += size + + if len(sizes) == 0: + break + + if timestep < 1: + time.sleep(1 - timestep) + + # format message + s = "" + for name, size in sizes.items(): + s += "%s pool: %s, " % (name, size) + msg = "Waiting for tasks in managed pools to stop: %s" % s + # Prevent flooding to log + if msg != prev_msg: + prev_msg = msg + self.log.info("%s", msg) + + percent = 100 * (time.time() - start_time) / timeout + msg = "File Server: waiting for %s tasks to stop" % total_size + self.sendShutdownProgress(ui_websocket, msg, percent) + + for name, pool in list(self.managed_pools.items()): + size = len(pool) + if size: + self.log.info("Killing %s tasks in %s pool", size, name) + pool.kill() + + self.sendShutdownProgress(ui_websocket, "File Server stopped. Now to exit.", 100) + if self.thread_checker: gevent.kill(self.thread_checker) + self.thread_checker = None if self.stream_server: self.stream_server.stop() + def sendShutdownProgress(self, ui_websocket, message, progress): + if not ui_websocket: + return + ui_websocket.cmd("progress", ["shutdown", message, progress]) + time.sleep(0.01) + + # Sleeps the specified amount of time or until ConnectionServer is stopped + def sleep(self, t): + if t: + self.stopping_event.wait(timeout=t) + else: + time.sleep(t) + + # Spawns a thread that will be waited for on server being stopped (and killed after a timeout) + def spawn(self, *args, **kwargs): + thread = self.thread_pool.spawn(*args, **kwargs) + return thread + def closeConnections(self): self.log.debug("Closing all connection: %s" % len(self.connections)) for connection in self.connections[:]: connection.close("Close all connections") def handleIncomingConnection(self, sock, addr): - if config.offline: + if not self.allowsAcceptingConnections(): sock.close() return False @@ -148,7 +239,7 @@ class ConnectionServer(object): self.ip_incoming[ip] += 1 if self.ip_incoming[ip] > 6: # Allow 6 in 1 minute from same ip self.log.debug("Connection flood detected from %s" % ip) - time.sleep(30) + self.sleep(30) sock.close() return False else: @@ -167,7 +258,7 @@ class ConnectionServer(object): pass def getConnection(self, ip=None, port=None, peer_id=None, create=True, site=None, is_tracker_connection=False): - ip_type = helper.getIpType(ip) + ip_type = self.getIpType(ip) has_per_site_onion = (ip.endswith(".onion") or self.port_opened.get(ip_type, None) == False) and self.tor_manager.start_onions and site if has_per_site_onion: # Site-unique connection for Tor if ip.endswith(".onion"): @@ -203,7 +294,7 @@ class ConnectionServer(object): return connection # No connection found - if create and not config.offline: # Allow to create new connection if not found + if create and self.allowsCreatingConnections(): if port == 0: raise Exception("This peer is not connectable") @@ -211,6 +302,7 @@ class ConnectionServer(object): raise Exception("This peer is blacklisted") try: + #self.log.info("Connection to: %s:%s", ip, port) if has_per_site_onion: # Lock connection to site connection = Connection(self, ip, port, target_onion=site_onion, is_tracker_connection=is_tracker_connection) else: @@ -229,11 +321,12 @@ class ConnectionServer(object): self.last_connection_id_current_version += 1 except Exception as err: + #self.log.info("Connection error (%s, %s): %s", ip, port, Debug.formatException(err)) connection.close("%s Connect error: %s" % (ip, Debug.formatException(err))) raise err if len(self.connections) > config.global_connected_limit: - gevent.spawn(self.checkMaxConnections) + self.spawn(self.checkMaxConnections) return connection else: @@ -256,12 +349,12 @@ class ConnectionServer(object): def checkConnections(self): run_i = 0 - time.sleep(15) + self.sleep(15) while self.running: run_i += 1 self.ip_incoming = {} # Reset connected ips counter - last_message_time = 0 s = time.time() + self.updateOnlineStatus(None) for connection in self.connections[:]: # Make a copy if connection.ip.endswith(".onion") or config.tor == "always": timeout_multipler = 2 @@ -269,9 +362,6 @@ class ConnectionServer(object): timeout_multipler = 1 idle = time.time() - max(connection.last_recv_time, connection.start_time, connection.last_message_time) - if connection.last_message_time > last_message_time and not connection.is_private_ip: - # Message from local IPs does not means internet connection - last_message_time = connection.last_message_time if connection.unpacker and idle > 30: # Delete the unpacker if not needed @@ -319,24 +409,12 @@ class ConnectionServer(object): # Reset bad action counter every 30 min connection.bad_actions = 0 - # Internet outage detection - if time.time() - last_message_time > max(60, 60 * 10 / max(1, float(len(self.connections)) / 50)): - # Offline: Last message more than 60-600sec depending on connection number - if self.has_internet and last_message_time: - self.has_internet = False - self.onInternetOffline() - else: - # Online - if not self.has_internet: - self.has_internet = True - self.onInternetOnline() - self.timecorrection = self.getTimecorrection() if time.time() - s > 0.01: self.log.debug("Connection cleanup in %.3fs" % (time.time() - s)) - time.sleep(15) + self.sleep(15) self.log.debug("Checkconnections ended") @util.Noparallel(blocking=False) @@ -361,6 +439,68 @@ class ConnectionServer(object): )) return num_closed + # Returns True if we should slow down opening new connections as at the moment + # there are too many connections being established and not connected completely + # (not entered the message loop yet). + def shouldThrottleNewConnections(self): + threshold = config.simultaneous_connection_throttle_threshold + if len(self.connections) <= threshold: + return False + nr_connections_being_established = 0 + for connection in self.connections[:]: # Make a copy + if connection.connecting and not connection.connected and connection.type == "out": + nr_connections_being_established += 1 + if nr_connections_being_established > threshold: + return True + return False + + # Internet outage detection + def updateOnlineStatus(self, connection, outgoing_activity=False, successful_activity=False): + + now = time.time() + + if connection and not connection.is_private_ip: + if outgoing_activity: + self.last_outgoing_internet_activity_time = now + if successful_activity: + self.last_successful_internet_activity_time = now + self.setInternetStatus(True) + return + + if not self.last_outgoing_internet_activity_time: + return + + if ( + (self.last_successful_internet_activity_time < now - self.internet_outage_threshold) + and + (self.last_successful_internet_activity_time < self.last_outgoing_internet_activity_time) + ): + self.setInternetStatus(False) + return + + # This is the old algorithm just in case we missed something + idle = now - self.last_successful_internet_activity_time + if idle > max(60, 60 * 10 / max(1, float(len(self.connections)) / 50)): + # Offline: Last successful activity more than 60-600sec depending on connection number + self.setInternetStatus(False) + return + + def setInternetStatus(self, status): + if self.has_internet == status: + return + + self.has_internet = status + + if self.has_internet: + self.internet_online_since = time.time() + self.spawn(self.onInternetOnline) + else: + self.internet_offline_since = time.time() + self.spawn(self.onInternetOffline) + + def isInternetOnline(self): + return self.has_internet + def onInternetOnline(self): self.log.info("Internet online") @@ -368,6 +508,32 @@ class ConnectionServer(object): self.had_external_incoming = False self.log.info("Internet offline") + def setOfflineMode(self, offline_mode): + if config.offline == offline_mode: + return + config.offline = offline_mode # Yep, awkward + if offline_mode: + self.log.info("offline mode is ON") + else: + self.log.info("offline mode is OFF") + + def isOfflineMode(self): + return config.offline + + def allowsCreatingConnections(self): + if self.isOfflineMode(): + return False + if self.stopping: + return False + return True + + def allowsAcceptingConnections(self): + if self.isOfflineMode(): + return False + if self.stopping: + return False + return True + def getTimecorrection(self): corrections = sorted([ connection.handshake.get("time") - connection.handshake_time + connection.last_ping_delay @@ -379,3 +545,48 @@ class ConnectionServer(object): mid = int(len(corrections) / 2 - 1) median = (corrections[mid - 1] + corrections[mid] + corrections[mid + 1]) / 3 return median + + + ############################################################################ + + # Methods for handling network address types + # (ipv4, ipv6, onion etc... more to be implemented by plugins) + # + # All the functions handling network address types have "Ip" in the name. + # So it was in the initial codebase, and I keep the naming, since I couldn't + # think of a better option. + # "IP" is short and quite clear and lets you understand that a variable + # contains a peer address or other transport-level address and not + # an address of ZeroNet site. + # + + # Returns type of the given network address. + # Since: 0.8.0 + # Replaces helper.getIpType() in order to be extensible by plugins. + def getIpType(self, ip): + if ip.endswith(".onion"): + return "onion" + elif ":" in ip: + return "ipv6" + elif re.match(r"[0-9\.]+$", ip): + return "ipv4" + else: + return "unknown" + + # Checks if a network address can be reachable in the current configuration + # and returs a string describing why it cannot. + # If the network address can be reachable, returns False. + # Since: 0.8.0 + def getIpUnreachability(self, ip): + ip_type = self.getIpType(ip) + if ip_type == 'onion' and not self.tor_manager.enabled: + return "Can't connect to onion addresses, no Tor controller present" + if config.tor == "always" and helper.isPrivateIp(ip) and ip not in config.ip_local: + return "Can't connect to local IPs in Tor: always mode" + return False + + # Returns True if ConnctionServer has means for establishing outgoing + # connections to the given address. + # Since: 0.8.0 + def isIpReachable(self, ip): + return self.getIpUnreachability(ip) == False diff --git a/src/Content/ContentManager.py b/src/Content/ContentManager.py index 27da402b..c6a64750 100644 --- a/src/Content/ContentManager.py +++ b/src/Content/ContentManager.py @@ -239,7 +239,7 @@ class ContentManager(object): if num_removed_bad_files > 0: self.site.worker_manager.removeSolvedFileTasks(mark_as_good=False) - gevent.spawn(self.site.update, since=0) + self.site.spawn(self.site.update, since=0) self.log.debug("Archived removed contents: %s, removed bad files: %s" % (num_removed_contents, num_removed_bad_files)) @@ -651,6 +651,25 @@ class ContentManager(object): ) return files_node, files_optional_node + def serializeForSigning(self, content): + if "sign" in content: + del(content["sign"]) # The file signed without the sign + if "signs" in content: + del(content["signs"]) # The file signed without the signs + + sign_content = json.dumps(content, sort_keys=True) # Dump the json to string to remove whitespaces + + # Fix float representation error on Android + modified = content["modified"] + if config.fix_float_decimals and type(modified) is float and not str(modified).endswith(".0"): + modified_fixed = "{:.6f}".format(modified).strip("0.") + sign_content = sign_content.replace( + '"modified": %s' % repr(modified), + '"modified": %s' % modified_fixed + ) + + return sign_content + # Create and sign a content.json # Return: The new content if filewrite = False def sign(self, inner_path="content.json", privatekey=None, filewrite=True, update_changed_files=False, extend=None, remove_missing_optional=False): @@ -756,12 +775,7 @@ class ContentManager(object): self.log.info("Signing %s..." % inner_path) - if "signs" in new_content: - del(new_content["signs"]) # Delete old signs - if "sign" in new_content: - del(new_content["sign"]) # Delete old sign (backward compatibility) - - sign_content = json.dumps(new_content, sort_keys=True) + sign_content = self.serializeForSigning(new_content) sign = CryptBitcoin.sign(sign_content, privatekey) # new_content["signs"] = content.get("signs", {}) # TODO: Multisig if sign: # If signing is successful (not an old address) @@ -925,102 +939,95 @@ class ContentManager(object): return True # All good + def verifyContentFile(self, inner_path, file, ignore_same=True): + from Crypt import CryptBitcoin + + if type(file) is dict: + new_content = file + else: + try: + if sys.version_info.major == 3 and sys.version_info.minor < 6: + new_content = json.loads(file.read().decode("utf8")) + else: + new_content = json.load(file) + except Exception as err: + raise VerifyError("Invalid json file: %s" % err) + if inner_path in self.contents: + old_content = self.contents.get(inner_path, {"modified": 0}) + # Checks if its newer the ours + if old_content["modified"] == new_content["modified"] and ignore_same: # Ignore, have the same content.json + return None + elif old_content["modified"] > new_content["modified"]: # We have newer + raise VerifyError( + "We have newer (Our: %s, Sent: %s)" % + (old_content["modified"], new_content["modified"]) + ) + if new_content["modified"] > time.time() + 60 * 60 * 24: # Content modified in the far future (allow 1 day+) + raise VerifyError("Modify timestamp is in the far future!") + if self.isArchived(inner_path, new_content["modified"]): + if inner_path in self.site.bad_files: + del self.site.bad_files[inner_path] + raise VerifyError("This file is archived!") + # Check sign + sign = new_content.get("sign") + signs = new_content.get("signs", {}) + sign_content = self.serializeForSigning(new_content) + + if signs: # New style signing + valid_signers = self.getValidSigners(inner_path, new_content) + signs_required = self.getSignsRequired(inner_path, new_content) + + if inner_path == "content.json" and len(valid_signers) > 1: # Check signers_sign on root content.json + signers_data = "%s:%s" % (signs_required, ",".join(valid_signers)) + if not CryptBitcoin.verify(signers_data, self.site.address, new_content["signers_sign"]): + raise VerifyError("Invalid signers_sign!") + + if inner_path != "content.json" and not self.verifyCert(inner_path, new_content): # Check if cert valid + raise VerifyError("Invalid cert!") + + valid_signs = 0 + for address in valid_signers: + if address in signs: + valid_signs += CryptBitcoin.verify(sign_content, address, signs[address]) + if valid_signs >= signs_required: + break # Break if we has enough signs + if valid_signs < signs_required: + raise VerifyError("Valid signs: %s/%s" % (valid_signs, signs_required)) + else: + return self.verifyContent(inner_path, new_content) + elif sign: # Old style signing + raise VerifyError("Invalid old-style sign") + else: + raise VerifyError("Not signed") + + def verifyOrdinaryFile(self, inner_path, file, ignore_same=True): + file_info = self.getFileInfo(inner_path) + if file_info: + if CryptHash.sha512sum(file) != file_info.get("sha512", ""): + raise VerifyError("Invalid hash") + + if file_info.get("size", 0) != file.tell(): + raise VerifyError( + "File size does not match %s <> %s" % + (inner_path, file.tell(), file_info.get("size", 0)) + ) + + return True + + else: # File not in content.json + raise VerifyError("File not in content.json") + # Verify file validity # Return: None = Same as before, False = Invalid, True = Valid def verifyFile(self, inner_path, file, ignore_same=True): - if inner_path.endswith("content.json"): # content.json: Check using sign - from Crypt import CryptBitcoin - try: - if type(file) is dict: - new_content = file - else: - try: - if sys.version_info.major == 3 and sys.version_info.minor < 6: - new_content = json.loads(file.read().decode("utf8")) - else: - new_content = json.load(file) - except Exception as err: - raise VerifyError("Invalid json file: %s" % err) - if inner_path in self.contents: - old_content = self.contents.get(inner_path, {"modified": 0}) - # Checks if its newer the ours - if old_content["modified"] == new_content["modified"] and ignore_same: # Ignore, have the same content.json - return None - elif old_content["modified"] > new_content["modified"]: # We have newer - raise VerifyError( - "We have newer (Our: %s, Sent: %s)" % - (old_content["modified"], new_content["modified"]) - ) - if new_content["modified"] > time.time() + 60 * 60 * 24: # Content modified in the far future (allow 1 day+) - raise VerifyError("Modify timestamp is in the far future!") - if self.isArchived(inner_path, new_content["modified"]): - if inner_path in self.site.bad_files: - del self.site.bad_files[inner_path] - raise VerifyError("This file is archived!") - # Check sign - sign = new_content.get("sign") - signs = new_content.get("signs", {}) - if "sign" in new_content: - del(new_content["sign"]) # The file signed without the sign - if "signs" in new_content: - del(new_content["signs"]) # The file signed without the signs - - sign_content = json.dumps(new_content, sort_keys=True) # Dump the json to string to remove whitepsace - - # Fix float representation error on Android - modified = new_content["modified"] - if config.fix_float_decimals and type(modified) is float and not str(modified).endswith(".0"): - modified_fixed = "{:.6f}".format(modified).strip("0.") - sign_content = sign_content.replace( - '"modified": %s' % repr(modified), - '"modified": %s' % modified_fixed - ) - - if signs: # New style signing - valid_signers = self.getValidSigners(inner_path, new_content) - signs_required = self.getSignsRequired(inner_path, new_content) - - if inner_path == "content.json" and len(valid_signers) > 1: # Check signers_sign on root content.json - signers_data = "%s:%s" % (signs_required, ",".join(valid_signers)) - if not CryptBitcoin.verify(signers_data, self.site.address, new_content["signers_sign"]): - raise VerifyError("Invalid signers_sign!") - - if inner_path != "content.json" and not self.verifyCert(inner_path, new_content): # Check if cert valid - raise VerifyError("Invalid cert!") - - valid_signs = 0 - for address in valid_signers: - if address in signs: - valid_signs += CryptBitcoin.verify(sign_content, address, signs[address]) - if valid_signs >= signs_required: - break # Break if we has enough signs - if valid_signs < signs_required: - raise VerifyError("Valid signs: %s/%s" % (valid_signs, signs_required)) - else: - return self.verifyContent(inner_path, new_content) - else: # Old style signing - raise VerifyError("Invalid old-style sign") - - except Exception as err: - self.log.warning("%s: verify sign error: %s" % (inner_path, Debug.formatException(err))) - raise err - - else: # Check using sha512 hash - file_info = self.getFileInfo(inner_path) - if file_info: - if CryptHash.sha512sum(file) != file_info.get("sha512", ""): - raise VerifyError("Invalid hash") - - if file_info.get("size", 0) != file.tell(): - raise VerifyError( - "File size does not match %s <> %s" % - (inner_path, file.tell(), file_info.get("size", 0)) - ) - - return True - - else: # File not in content.json - raise VerifyError("File not in content.json") + try: + if inner_path.endswith("content.json"): + return self.verifyContentFile(inner_path, file, ignore_same) + else: + return self.verifyOrdinaryFile(inner_path, file, ignore_same) + except Exception as err: + self.log.info("%s: verify error: %s" % (inner_path, Debug.formatException(err))) + raise err def optionalDelete(self, inner_path): self.site.storage.delete(inner_path) diff --git a/src/Crypt/CryptEd25519.py b/src/Crypt/CryptEd25519.py new file mode 100644 index 00000000..fc05a932 --- /dev/null +++ b/src/Crypt/CryptEd25519.py @@ -0,0 +1,340 @@ +## ZeroNet onion V3 support +## The following copied code is copied from stem.util.ed25519 official Tor Project python3 lib +## url : https://gitweb.torproject.org/stem.git/tree/stem/util/ed25519.py +## the ##modified tag means that the function has been modified respect to the one used by stem lib +## the ##custom tag means that the function has been added by me and it's not present on the stem ed25519.py file +## every comment i make begins with ## +## +# The following is copied from... +# +# https://github.com/pyca/ed25519 +# +# This is under the CC0 license. For more information please see... +# +# https://github.com/pyca/cryptography/issues/5068 + +# ed25519.py - Optimized version of the reference implementation of Ed25519 +# +# Written in 2011? by Daniel J. Bernstein <djb@cr.yp.to> +# 2013 by Donald Stufft <donald@stufft.io> +# 2013 by Alex Gaynor <alex.gaynor@gmail.com> +# 2013 by Greg Price <price@mit.edu> +# +# To the extent possible under law, the author(s) have dedicated all copyright +# and related and neighboring rights to this software to the public domain +# worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication along +# with this software. If not, see +# <http://creativecommons.org/publicdomain/zero/1.0/>. + +""" +NB: This code is not safe for use with secret keys or secret data. +The only safe use of this code is for verifying signatures on public messages. + +Functions for computing the public key of a secret key and for signing +a message are included, namely publickey_unsafe and signature_unsafe, +for testing purposes only. + +The root of the problem is that Python's long-integer arithmetic is +not designed for use in cryptography. Specifically, it may take more +or less time to execute an operation depending on the values of the +inputs, and its memory access patterns may also depend on the inputs. +This opens it to timing and cache side-channel attacks which can +disclose data to an attacker. We rely on Python's long-integer +arithmetic, so we cannot handle secrets without risking their disclosure. +""" + +import hashlib +import operator +import sys +import base64 + + +__version__ = "1.0.dev0" + + +# Useful for very coarse version differentiation. +PY3 = sys.version_info[0] == 3 + +if PY3: + indexbytes = operator.getitem + intlist2bytes = bytes + int2byte = operator.methodcaller("to_bytes", 1, "big") +else: + int2byte = chr + range = xrange + + def indexbytes(buf, i): + return ord(buf[i]) + + def intlist2bytes(l): + return b"".join(chr(c) for c in l) + + +b = 256 +q = 2 ** 255 - 19 +l = 2 ** 252 + 27742317777372353535851937790883648493 + + +def H(m): + return hashlib.sha512(m).digest() + + +def pow2(x, p): + """== pow(x, 2**p, q)""" + while p > 0: + x = x * x % q + p -= 1 + return x + + +def inv(z): + """$= z^{-1} \mod q$, for z != 0""" + # Adapted from curve25519_athlon.c in djb's Curve25519. + z2 = z * z % q # 2 + z9 = pow2(z2, 2) * z % q # 9 + z11 = z9 * z2 % q # 11 + z2_5_0 = (z11 * z11) % q * z9 % q # 31 == 2^5 - 2^0 + z2_10_0 = pow2(z2_5_0, 5) * z2_5_0 % q # 2^10 - 2^0 + z2_20_0 = pow2(z2_10_0, 10) * z2_10_0 % q # ... + z2_40_0 = pow2(z2_20_0, 20) * z2_20_0 % q + z2_50_0 = pow2(z2_40_0, 10) * z2_10_0 % q + z2_100_0 = pow2(z2_50_0, 50) * z2_50_0 % q + z2_200_0 = pow2(z2_100_0, 100) * z2_100_0 % q + z2_250_0 = pow2(z2_200_0, 50) * z2_50_0 % q # 2^250 - 2^0 + return pow2(z2_250_0, 5) * z11 % q # 2^255 - 2^5 + 11 = q - 2 + + +d = -121665 * inv(121666) % q +I = pow(2, (q - 1) // 4, q) + + +def xrecover(y): + xx = (y * y - 1) * inv(d * y * y + 1) + x = pow(xx, (q + 3) // 8, q) + + if (x * x - xx) % q != 0: + x = (x * I) % q + + if x % 2 != 0: + x = q-x + + return x + + +By = 4 * inv(5) +Bx = xrecover(By) +B = (Bx % q, By % q, 1, (Bx * By) % q) +ident = (0, 1, 1, 0) + + +def edwards_add(P, Q): + # This is formula sequence 'addition-add-2008-hwcd-3' from + # http://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html + (x1, y1, z1, t1) = P + (x2, y2, z2, t2) = Q + + a = (y1-x1)*(y2-x2) % q + b = (y1+x1)*(y2+x2) % q + c = t1*2*d*t2 % q + dd = z1*2*z2 % q + e = b - a + f = dd - c + g = dd + c + h = b + a + x3 = e*f + y3 = g*h + t3 = e*h + z3 = f*g + + return (x3 % q, y3 % q, z3 % q, t3 % q) + + +def edwards_double(P): + # This is formula sequence 'dbl-2008-hwcd' from + # http://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html + (x1, y1, z1, t1) = P + + a = x1*x1 % q + b = y1*y1 % q + c = 2*z1*z1 % q + # dd = -a + e = ((x1+y1)*(x1+y1) - a - b) % q + g = -a + b # dd + b + f = g - c + h = -a - b # dd - b + x3 = e*f + y3 = g*h + t3 = e*h + z3 = f*g + + return (x3 % q, y3 % q, z3 % q, t3 % q) + + +def scalarmult(P, e): + if e == 0: + return ident + Q = scalarmult(P, e // 2) + Q = edwards_double(Q) + if e & 1: + Q = edwards_add(Q, P) + return Q + + +# Bpow[i] == scalarmult(B, 2**i) +Bpow = [] + + +def make_Bpow(): + P = B + for i in range(253): + Bpow.append(P) + P = edwards_double(P) +make_Bpow() + + +def scalarmult_B(e): + """ + Implements scalarmult(B, e) more efficiently. + """ + # scalarmult(B, l) is the identity + e = e % l + P = ident + for i in range(253): + if e & 1: + P = edwards_add(P, Bpow[i]) + e = e // 2 + assert e == 0, e + return P + + +def encodeint(y): + bits = [(y >> i) & 1 for i in range(b)] + return b''.join([ + int2byte(sum([bits[i * 8 + j] << j for j in range(8)])) + for i in range(b//8) + ]) + + +def encodepoint(P): + (x, y, z, t) = P + zi = inv(z) + x = (x * zi) % q + y = (y * zi) % q + bits = [(y >> i) & 1 for i in range(b - 1)] + [x & 1] + return b''.join([ + int2byte(sum([bits[i * 8 + j] << j for j in range(8)])) + for i in range(b // 8) + ]) + + +def bit(h, i): + return (indexbytes(h, i // 8) >> (i % 8)) & 1 + +##modified +def publickey_unsafe(sk): + """ + Not safe to use with secret keys or secret data. + + See module docstring. This function should be used for testing only. + """ + ##h = H(sk) + h = sk + a = 2 ** (b - 2) + sum(2 ** i * bit(h, i) for i in range(3, b - 2)) + A = scalarmult_B(a) + return encodepoint(A) + +##custom +## from stem.util.str_tools._to_unicode_impl +## from https://gitweb.torproject.org/stem.git/tree/stem/util/str_tools.py#n80 +def to_unicode_impl(msg): + if msg is not None and not isinstance(msg, str): + return msg.decode('utf-8', 'replace') + else: + return msg + +##custom +## rewritten stem.descriptor.hidden_service.address_from_identity_key +## from https://gitweb.torproject.org/stem.git/tree/stem/descriptor/hidden_service.py#n1088 +def publickey_to_onionaddress(key): + CHECKSUM_CONSTANT = b'.onion checksum' + ## version = stem.client.datatype.Size.CHAR.pack(3) + version = b'\x03' + checksum = hashlib.sha3_256(CHECKSUM_CONSTANT + key + version).digest()[:2] + onion_address = base64.b32encode(key + checksum + version) + return to_unicode_impl(onion_address + b'.onion').lower() + + +def Hint(m): + h = H(m) + return sum(2 ** i * bit(h, i) for i in range(2 * b)) + +##modified +def signature_unsafe(m, sk, pk): + """ + Not safe to use with secret keys or secret data. + + See module docstring. This function should be used for testing only. + """ + ##h = H(sk) + h = sk + a = 2 ** (b - 2) + sum(2 ** i * bit(h, i) for i in range(3, b - 2)) + r = Hint( + intlist2bytes([indexbytes(h, j) for j in range(b // 8, b // 4)]) + m + ) + R = scalarmult_B(r) + S = (r + Hint(encodepoint(R) + pk + m) * a) % l + return encodepoint(R) + encodeint(S) + + +def isoncurve(P): + (x, y, z, t) = P + return (z % q != 0 and + x*y % q == z*t % q and + (y*y - x*x - z*z - d*t*t) % q == 0) + + +def decodeint(s): + return sum(2 ** i * bit(s, i) for i in range(0, b)) + + +def decodepoint(s): + y = sum(2 ** i * bit(s, i) for i in range(0, b - 1)) + x = xrecover(y) + if x & 1 != bit(s, b-1): + x = q - x + P = (x, y, 1, (x*y) % q) + if not isoncurve(P): + raise ValueError("decoding point that is not on curve") + return P + + +class SignatureMismatch(Exception): + pass + + +def checkvalid(s, m, pk): + """ + Not safe to use when any argument is secret. + + See module docstring. This function should be used only for + verifying public signatures of public messages. + """ + if len(s) != b // 4: + raise ValueError("signature length is wrong") + + if len(pk) != b // 8: + raise ValueError("public-key length is wrong") + + R = decodepoint(s[:b // 8]) + A = decodepoint(pk) + S = decodeint(s[b // 8:b // 4]) + h = Hint(encodepoint(R) + pk + m) + + (x1, y1, z1, t1) = P = scalarmult_B(S) + (x2, y2, z2, t2) = Q = edwards_add(R, scalarmult(A, h)) + + if (not isoncurve(P) or not isoncurve(Q) or + (x1*z2 - x2*z1) % q != 0 or (y1*z2 - y2*z1) % q != 0): + raise SignatureMismatch("signature does not pass verification") diff --git a/src/Crypt/CryptTor.py b/src/Crypt/CryptTor.py index 78ba6fc2..12866f49 100644 --- a/src/Crypt/CryptTor.py +++ b/src/Crypt/CryptTor.py @@ -52,6 +52,7 @@ def verify(data, publickey, sign): return valid def privatekeyToPublickey(privatekey): + from Crypt import CryptEd25519 import rsa from rsa import pkcs1 from lib import Ed25519 diff --git a/src/Debug/DebugHook.py b/src/Debug/DebugHook.py index 772d309c..c11185da 100644 --- a/src/Debug/DebugHook.py +++ b/src/Debug/DebugHook.py @@ -12,6 +12,8 @@ from . import Debug last_error = None thread_shutdown = None +thread_shutdown = None + def shutdownThread(): import main try: diff --git a/src/File/FileRequest.py b/src/File/FileRequest.py index 85bbcdce..8a16e591 100644 --- a/src/File/FileRequest.py +++ b/src/File/FileRequest.py @@ -165,7 +165,7 @@ class FileRequest(object): peer = site.addPeer(self.connection.ip, self.connection.port, return_peer=True, source="update") # Add or get peer # On complete publish to other peers diffs = params.get("diffs", {}) - site.onComplete.once(lambda: site.publish(inner_path=inner_path, diffs=diffs, limit=6), "publish_%s" % inner_path) + site.onComplete.once(lambda: site.publish(inner_path=inner_path, diffs=diffs), "publish_%s" % inner_path) # Load new content file and download changed files in new thread def downloader(): @@ -376,7 +376,7 @@ class FileRequest(object): for hash_id, peers in found.items(): for peer in peers: - ip_type = helper.getIpType(peer.ip) + ip_type = self.server.getIpType(peer.ip) if len(back[ip_type][hash_id]) < 20: back[ip_type][hash_id].append(peer.packMyAddress()) return back @@ -430,7 +430,7 @@ class FileRequest(object): # Check requested port of the other peer def actionCheckport(self, params): - if helper.getIpType(self.connection.ip) == "ipv6": + if self.server.getIpType(self.connection.ip) == "ipv6": sock_address = (self.connection.ip, params["port"], 0, 0) else: sock_address = (self.connection.ip, params["port"]) diff --git a/src/File/FileServer.py b/src/File/FileServer.py index 7f73017e..7114849b 100644 --- a/src/File/FileServer.py +++ b/src/File/FileServer.py @@ -3,6 +3,7 @@ import time import random import socket import sys +import weakref import gevent import gevent.pool @@ -18,6 +19,13 @@ from Connection import ConnectionServer from Plugin import PluginManager from Debug import Debug +log = logging.getLogger("FileServer") + +class FakeThread(object): + def __init__(self): + pass + def ready(self): + return False @PluginManager.acceptPlugins class FileServer(ConnectionServer): @@ -25,12 +33,31 @@ class FileServer(ConnectionServer): def __init__(self, ip=config.fileserver_ip, port=config.fileserver_port, ip_type=config.fileserver_ip_type): self.site_manager = SiteManager.site_manager self.portchecker = PeerPortchecker.PeerPortchecker(self) - self.log = logging.getLogger("FileServer") self.ip_type = ip_type self.ip_external_list = [] + # This is wrong: + # self.log = logging.getLogger("FileServer") + # The value of self.log will be overwritten in ConnectionServer.__init__() + + self.recheck_port = True + + self.active_mode_thread_pool = gevent.pool.Pool(None) + self.site_pool = gevent.pool.Pool(None) + + self.update_pool = gevent.pool.Pool(10) + self.update_start_time = 0 + self.update_sites_task_next_nr = 1 + + self.update_threads = weakref.WeakValueDictionary() + + self.passive_mode = None + self.active_mode = None + self.active_mode_threads = {} + + self.supported_ip_types = ["ipv4"] # Outgoing ip_type support - if helper.getIpType(ip) == "ipv6" or self.isIpv6Supported(): + if self.getIpType(ip) == "ipv6" or self.isIpv6Supported(): self.supported_ip_types.append("ipv6") if ip_type == "ipv6" or (ip_type == "dual" and "ipv6" in self.supported_ip_types): @@ -52,27 +79,50 @@ class FileServer(ConnectionServer): config.arguments.fileserver_port = port ConnectionServer.__init__(self, ip, port, self.handleRequest) - self.log.debug("Supported IP types: %s" % self.supported_ip_types) + log.debug("Supported IP types: %s" % self.supported_ip_types) + + self.managed_pools["active_mode_thread"] = self.active_mode_thread_pool + self.managed_pools["update"] = self.update_pool + self.managed_pools["site"] = self.site_pool if ip_type == "dual" and ip == "::": # Also bind to ipv4 addres in dual mode try: - self.log.debug("Binding proxy to %s:%s" % ("::", self.port)) + log.debug("Binding proxy to %s:%s" % ("::", self.port)) self.stream_server_proxy = StreamServer( ("0.0.0.0", self.port), self.handleIncomingConnection, spawn=self.pool, backlog=100 ) except Exception as err: - self.log.info("StreamServer proxy create error: %s" % Debug.formatException(err)) + log.info("StreamServer proxy create error: %s" % Debug.formatException(err)) self.port_opened = {} - self.sites = self.site_manager.sites self.last_request = time.time() self.files_parsing = {} self.ui_server = None + def getSites(self): + sites = self.site_manager.list() + # We need to keep self.sites for the backward compatibility with plugins. + # Never. Ever. Use it. + # TODO: fix plugins + self.sites = sites + return sites + + def getSite(self, address): + return self.getSites().get(address, None) + + def getSiteAddresses(self): + # Avoid saving the site list on the stack, since a site may be deleted + # from the original list while iterating. + # Use the list of addresses instead. + return [ + site.address for site in + sorted(list(self.getSites().values()), key=lambda site: site.settings.get("modified", 0), reverse=True) + ] + def getRandomPort(self, ip, port_range_from, port_range_to): - self.log.info("Getting random port in range %s-%s..." % (port_range_from, port_range_to)) + log.info("Getting random port in range %s-%s..." % (port_range_from, port_range_to)) tried = [] for bind_retry in range(100): port = random.randint(port_range_from, port_range_to) @@ -84,14 +134,14 @@ class FileServer(ConnectionServer): sock.bind((ip, port)) success = True except Exception as err: - self.log.warning("Error binding to port %s: %s" % (port, err)) + log.warning("Error binding to port %s: %s" % (port, err)) success = False sock.close() if success: - self.log.info("Found unused random port: %s" % port) + log.info("Found unused random port: %s" % port) return port else: - time.sleep(0.1) + self.sleep(0.1) return False def isIpv6Supported(self): @@ -104,16 +154,16 @@ class FileServer(ConnectionServer): sock.connect((ipv6_testip, 80)) local_ipv6 = sock.getsockname()[0] if local_ipv6 == "::1": - self.log.debug("IPv6 not supported, no local IPv6 address") + log.debug("IPv6 not supported, no local IPv6 address") return False else: - self.log.debug("IPv6 supported on IP %s" % local_ipv6) + log.debug("IPv6 supported on IP %s" % local_ipv6) return True except socket.error as err: - self.log.warning("IPv6 not supported: %s" % err) + log.warning("IPv6 not supported: %s" % err) return False except Exception as err: - self.log.error("IPv6 check error: %s" % err) + log.error("IPv6 check error: %s" % err) return False def listenProxy(self): @@ -121,29 +171,34 @@ class FileServer(ConnectionServer): self.stream_server_proxy.serve_forever() except Exception as err: if err.errno == 98: # Address already in use error - self.log.debug("StreamServer proxy listen error: %s" % err) + log.debug("StreamServer proxy listen error: %s" % err) else: - self.log.info("StreamServer proxy listen error: %s" % err) + log.info("StreamServer proxy listen error: %s" % err) # Handle request to fileserver def handleRequest(self, connection, message): if config.verbose: if "params" in message: - self.log.debug( + log.debug( "FileRequest: %s %s %s %s" % (str(connection), message["cmd"], message["params"].get("site"), message["params"].get("inner_path")) ) else: - self.log.debug("FileRequest: %s %s" % (str(connection), message["cmd"])) + log.debug("FileRequest: %s %s" % (str(connection), message["cmd"])) req = FileRequest(self, connection) req.route(message["cmd"], message.get("req_id"), message.get("params")) - if not self.has_internet and not connection.is_private_ip: - self.has_internet = True - self.onInternetOnline() + if not connection.is_private_ip: + self.setInternetStatus(True) def onInternetOnline(self): - self.log.info("Internet online") - gevent.spawn(self.checkSites, check_files=False, force_port_check=True) + log.info("Internet online") + invalid_interval=( + self.internet_offline_since - self.internet_outage_threshold - random.randint(60 * 5, 60 * 10), + time.time() + ) + self.invalidateUpdateTime(invalid_interval) + self.recheck_port = True + self.spawn(self.updateSites) # Reload the FileRequest class to prevent restarts in debug mode def reload(self): @@ -152,8 +207,8 @@ class FileServer(ConnectionServer): FileRequest = imp.load_source("FileRequest", "src/File/FileRequest.py").FileRequest def portCheck(self): - if config.offline: - self.log.info("Offline mode: port check disabled") + if self.isOfflineMode(): + log.info("Offline mode: port check disabled") res = {"ipv4": None, "ipv6": None} self.port_opened = res return res @@ -162,14 +217,14 @@ class FileServer(ConnectionServer): for ip_external in config.ip_external: SiteManager.peer_blacklist.append((ip_external, self.port)) # Add myself to peer blacklist - ip_external_types = set([helper.getIpType(ip) for ip in config.ip_external]) + ip_external_types = set([self.getIpType(ip) for ip in config.ip_external]) res = { "ipv4": "ipv4" in ip_external_types, "ipv6": "ipv6" in ip_external_types } self.ip_external_list = config.ip_external self.port_opened.update(res) - self.log.info("Server port opened based on configuration ipv4: %s, ipv6: %s" % (res["ipv4"], res["ipv6"])) + log.info("Server port opened based on configuration ipv4: %s, ipv6: %s" % (res["ipv4"], res["ipv6"])) return res self.port_opened = {} @@ -177,7 +232,7 @@ class FileServer(ConnectionServer): self.ui_server.updateWebsocket() if "ipv6" in self.supported_ip_types: - res_ipv6_thread = gevent.spawn(self.portchecker.portCheck, self.port, "ipv6") + res_ipv6_thread = self.spawn(self.portchecker.portCheck, self.port, "ipv6") else: res_ipv6_thread = None @@ -190,8 +245,8 @@ class FileServer(ConnectionServer): res_ipv6 = {"ip": None, "opened": None} else: res_ipv6 = res_ipv6_thread.get() - if res_ipv6["opened"] and not helper.getIpType(res_ipv6["ip"]) == "ipv6": - self.log.info("Invalid IPv6 address from port check: %s" % res_ipv6["ip"]) + if res_ipv6["opened"] and not self.getIpType(res_ipv6["ip"]) == "ipv6": + log.info("Invalid IPv6 address from port check: %s" % res_ipv6["ip"]) res_ipv6["opened"] = False self.ip_external_list = [] @@ -200,7 +255,7 @@ class FileServer(ConnectionServer): self.ip_external_list.append(res_ip["ip"]) SiteManager.peer_blacklist.append((res_ip["ip"], self.port)) - self.log.info("Server port opened ipv4: %s, ipv6: %s" % (res_ipv4["opened"], res_ipv6["opened"])) + log.info("Server port opened ipv4: %s, ipv6: %s" % (res_ipv4["opened"], res_ipv6["opened"])) res = {"ipv4": res_ipv4["opened"], "ipv6": res_ipv6["opened"]} @@ -211,9 +266,9 @@ class FileServer(ConnectionServer): for ip in interface_ips: if not helper.isPrivateIp(ip) and ip not in self.ip_external_list: self.ip_external_list.append(ip) - res[helper.getIpType(ip)] = True # We have opened port if we have external ip + res[self.getIpType(ip)] = True # We have opened port if we have external ip SiteManager.peer_blacklist.append((ip, self.port)) - self.log.debug("External ip found on interfaces: %s" % ip) + log.debug("External ip found on interfaces: %s" % ip) self.port_opened.update(res) @@ -222,131 +277,381 @@ class FileServer(ConnectionServer): return res - # Check site file integrity - def checkSite(self, site, check_files=False): - if site.isServing(): - site.announce(mode="startup") # Announce site to tracker - site.update(check_files=check_files) # Update site's content.json and download changed files - site.sendMyHashfield() - site.updateHashfield() - - # Check sites integrity - @util.Noparallel() - def checkSites(self, check_files=False, force_port_check=False): - self.log.debug("Checking sites...") - s = time.time() - sites_checking = False - if not self.port_opened or force_port_check: # Test and open port if not tested yet - if len(self.sites) <= 2: # Don't wait port opening on first startup - sites_checking = True - for address, site in list(self.sites.items()): - gevent.spawn(self.checkSite, site, check_files) - + @util.Noparallel(queue=True) + def recheckPort(self): + if self.recheck_port: self.portCheck() + self.recheck_port = False - if not self.port_opened["ipv4"]: - self.tor_manager.startOnions() + # Returns False if Internet is immediately available + # Returns True if we've spent some time waiting for Internet + # Returns None if FileServer is stopping or the Offline mode is enabled + @util.Noparallel() + def waitForInternetOnline(self): + if self.isOfflineMode() or self.stopping: + return None - if not sites_checking: - check_pool = gevent.pool.Pool(5) - # Check sites integrity - for site in sorted(list(self.sites.values()), key=lambda site: site.settings.get("modified", 0), reverse=True): - if not site.isServing(): - continue - check_thread = check_pool.spawn(self.checkSite, site, check_files) # Check in new thread - time.sleep(2) - if site.settings.get("modified", 0) < time.time() - 60 * 60 * 24: # Not so active site, wait some sec to finish - check_thread.join(timeout=5) - self.log.debug("Checksites done in %.3fs" % (time.time() - s)) + if self.isInternetOnline(): + return False + + while not self.isInternetOnline(): + self.sleep(30) + if self.isOfflineMode() or self.stopping: + return None + if self.isInternetOnline(): + break + if len(self.update_pool) == 0: + log.info("Internet connection seems to be broken. Running an update for a random site to check if we are able to connect to any peer.") + thread = self.thread_pool.spawn(self.updateRandomSite) + thread.join() + + self.recheckPort() + return True + + def updateRandomSite(self, site_addresses=None, force=False): + if not site_addresses: + site_addresses = self.getSiteAddresses() + + site_addresses = random.sample(site_addresses, 1) + if len(site_addresses) < 1: + return + + address = site_addresses[0] + site = self.getSite(address) + + if not site: + return + + log.info("Randomly chosen site: %s", site.address_short) + + self.spawnUpdateSite(site).join() + + def updateSite(self, site, check_files=False, verify_files=False): + if not site: + return + if verify_files: + mode = 'verify' + elif check_files: + mode = 'check' + else: + mode = 'update' + log.info("running <%s> for %s" % (mode, site.address_short)) + site.update2(check_files=check_files, verify_files=verify_files) + + def spawnUpdateSite(self, site, check_files=False, verify_files=False): + fake_thread = FakeThread() + self.update_threads[site.address] = fake_thread + thread = self.update_pool.spawn(self.updateSite, site, + check_files=check_files, verify_files=verify_files) + self.update_threads[site.address] = thread + return thread + + def lookupInUpdatePool(self, site_address): + thread = self.update_threads.get(site_address, None) + if not thread or thread.ready(): + return None + return thread + + def siteIsInUpdatePool(self, site_address): + return self.lookupInUpdatePool(site_address) is not None + + def invalidateUpdateTime(self, invalid_interval): + for address in self.getSiteAddresses(): + site = self.getSite(address) + if site: + site.invalidateUpdateTime(invalid_interval) + + def isSiteUpdateTimeValid(self, site_address): + site = self.getSite(site_address) + if not site: + return False + return site.isUpdateTimeValid() + + def updateSites(self): + task_nr = self.update_sites_task_next_nr + self.update_sites_task_next_nr += 1 + + task_description = "updateSites [#%d]" % task_nr + log.info("%s: started", task_description) + + # Don't wait port opening on first startup. Do the instant check now. + if len(self.getSites()) <= 2: + for address, site in list(self.getSites().items()): + self.updateSite(site, check_files=True) + + self.recheckPort() + + all_site_addresses = self.getSiteAddresses() + site_addresses = [ + address for address in all_site_addresses + if not self.isSiteUpdateTimeValid(address) + ] + + log.info("%s: chosen %d sites (of %d)", task_description, len(site_addresses), len(all_site_addresses)) + + sites_processed = 0 + sites_skipped = 0 + start_time = time.time() + self.update_start_time = start_time + progress_print_time = time.time() + + # Check sites integrity + for site_address in site_addresses: + site = None + self.sleep(1) + self.waitForInternetOnline() + + while self.isActiveMode() and self.shouldThrottleNewConnections(): + self.sleep(1) + + if not self.isActiveMode(): + break + + site = self.getSite(site_address) + if not site or site.isUpdateTimeValid() or self.siteIsInUpdatePool(site_address): + sites_skipped += 1 + continue + + sites_processed += 1 + thread = self.spawnUpdateSite(site) + + if not self.isActiveMode(): + break + + if time.time() - progress_print_time > 60: + progress_print_time = time.time() + time_spent = time.time() - start_time + time_per_site = time_spent / float(sites_processed) + sites_left = len(site_addresses) - sites_processed + time_left = time_per_site * sites_left + log.info("%s: DONE: %d sites in %.2fs (%.2fs per site); SKIPPED: %d sites; LEFT: %d sites in %.2fs", + task_description, + sites_processed, + time_spent, + time_per_site, + sites_skipped, + sites_left, + time_left + ) + + if not self.isActiveMode(): + log.info("%s: stopped", task_description) + else: + log.info("%s: finished in %.2fs", task_description, time.time() - start_time) + + def peekSiteForVerification(self): + check_files_interval = 60 * 60 * 24 + verify_files_interval = 60 * 60 * 24 * 10 + site_addresses = self.getSiteAddresses() + random.shuffle(site_addresses) + for site_address in site_addresses: + site = self.getSite(site_address) + if not site: + continue + mode = site.isFileVerificationExpired(check_files_interval, verify_files_interval) + if mode: + return (site_address, mode) + return (None, None) + + + def sitesVerificationThread(self): + log.info("sitesVerificationThread started") + short_timeout = 20 + long_timeout = 120 + + self.sleep(long_timeout) + + while self.isActiveMode(): + site = None + self.sleep(short_timeout) + self.waitForInternetOnline() + + while self.isActiveMode() and self.shouldThrottleNewConnections(): + self.sleep(1) + + if not self.isActiveMode(): + break + + site_address, mode = self.peekSiteForVerification() + if not site_address: + self.sleep(long_timeout) + continue + + while self.siteIsInUpdatePool(site_address) and self.isActiveMode(): + self.sleep(1) + + if not self.isActiveMode(): + break + + site = self.getSite(site_address) + if not site: + continue + + if mode == "verify": + check_files = False + verify_files = True + elif mode == "check": + check_files = True + verify_files = False + else: + continue + + thread = self.spawnUpdateSite(site, + check_files=check_files, verify_files=verify_files) + + log.info("sitesVerificationThread stopped") + + def sitesMaintenanceThread(self, mode="full"): + log.info("sitesMaintenanceThread(%s) started" % mode) - def cleanupSites(self): - import gc startup = True - time.sleep(5 * 60) # Sites already cleaned up on startup - peers_protected = set([]) - while 1: - # Sites health care every 20 min - self.log.debug( - "Running site cleanup, connections: %s, internet: %s, protected peers: %s" % - (len(self.connections), self.has_internet, len(peers_protected)) + + short_timeout = 2 + min_long_timeout = 10 + max_long_timeout = 60 * 10 + long_timeout = min_long_timeout + short_cycle_time_limit = 60 * 2 + + while self.isActiveMode(): + self.sleep(long_timeout) + + while self.isActiveMode() and self.shouldThrottleNewConnections(): + self.sleep(1) + + if not self.isActiveMode(): + break + + start_time = time.time() + + log.debug( + "Starting <%s> maintenance cycle: connections=%s, internet=%s", + mode, + len(self.connections), self.isInternetOnline() + ) + start_time = time.time() + + site_addresses = self.getSiteAddresses() + + sites_processed = 0 + + for site_address in site_addresses: + if not self.isActiveMode(): + break + + site = self.getSite(site_address) + if not site: + continue + + log.debug("Running maintenance for site: %s", site.address_short) + + done = site.runPeriodicMaintenance(startup=startup) + site = None + if done: + sites_processed += 1 + self.sleep(short_timeout) + + # If we host hundreds of sites, the full maintenance cycle may take very + # long time, especially on startup ( > 1 hour). + # This means we are not able to run the maintenance procedure for active + # sites frequently enough using just a single maintenance thread. + # So we run 2 maintenance threads: + # * One running full cycles. + # * And one running short cycles for the most active sites. + # When the short cycle runs out of the time limit, it restarts + # from the beginning of the site list. + if mode == "short" and time.time() - start_time > short_cycle_time_limit: + break + + log.debug("<%s> maintenance cycle finished in %.2fs. Total sites: %d. Processed sites: %d. Timeout: %d", + mode, + time.time() - start_time, + len(site_addresses), + sites_processed, + long_timeout ) - for address, site in list(self.sites.items()): - if not site.isServing(): - continue + if sites_processed: + long_timeout = max(int(long_timeout / 2), min_long_timeout) + else: + long_timeout = min(long_timeout + 1, max_long_timeout) - if not startup: - site.cleanupPeers(peers_protected) - - time.sleep(1) # Prevent too quick request - - peers_protected = set([]) - for address, site in list(self.sites.items()): - if not site.isServing(): - continue - - if site.peers: - with gevent.Timeout(10, exception=False): - site.announcer.announcePex() - - # Last check modification failed - if site.content_updated is False: - site.update() - elif site.bad_files: - site.retryBadFiles() - - if time.time() - site.settings.get("modified", 0) < 60 * 60 * 24 * 7: - # Keep active connections if site has been modified witin 7 days - connected_num = site.needConnections(check_site_on_reconnect=True) - - if connected_num < config.connected_limit: # This site has small amount of peers, protect them from closing - peers_protected.update([peer.key for peer in site.getConnectedPeers()]) - - time.sleep(1) # Prevent too quick request - - site = None - gc.collect() # Implicit garbage collection + site_addresses = None startup = False - time.sleep(60 * 20) + log.info("sitesMaintenanceThread(%s) stopped" % mode) - def announceSite(self, site): - site.announce(mode="update", pex=False) - active_site = time.time() - site.settings.get("modified", 0) < 24 * 60 * 60 - if site.settings["own"] or active_site: - # Check connections more frequently on own and active sites to speed-up first connections - site.needConnections(check_site_on_reconnect=True) - site.sendMyHashfield(3) - site.updateHashfield(3) + def keepAliveThread(self): + # This thread is mostly useless on a system under load, since it never does + # any works, if we have active traffic. + # + # We should initiate some network activity to detect the Internet outage + # and avoid false positives. We normally have some network activity + # initiated by various parts on the application as well as network peers. + # So it's not a problem. + # + # However, if it actually happens that we have no network traffic for + # some time (say, we host just a couple of inactive sites, and no peers + # are interested in connecting to them), we initiate some traffic by + # performing the update for a random site. It's way better than just + # silly pinging a random peer for no profit. + log.info("keepAliveThread started") + while self.isActiveMode(): + self.waitForInternetOnline() - # Announce sites every 20 min - def announceSites(self): - time.sleep(5 * 60) # Sites already announced on startup - while 1: + threshold = self.internet_outage_threshold / 2.0 + + self.sleep(threshold / 2.0) + + while self.isActiveMode() and self.shouldThrottleNewConnections(): + self.sleep(1) + + if not self.isActiveMode(): + break + + last_activity_time = max( + self.last_successful_internet_activity_time, + self.last_outgoing_internet_activity_time) + now = time.time() + if not len(self.getSites()): + continue + if last_activity_time > now - threshold: + continue + if len(self.update_pool) != 0: + continue + + log.info("No network activity for %.2fs. Running an update for a random site.", + now - last_activity_time + ) + self.update_pool.spawn(self.updateRandomSite, force=True) + log.info("keepAliveThread stopped") + + # Periodic reloading of tracker files + def reloadTrackerFilesThread(self): + # TODO: + # This should probably be more sophisticated. + # We should check if the files have actually changed, + # and do it more often. + log.info("reloadTrackerFilesThread started") + interval = 60 * 10 + while self.isActiveMode(): + self.sleep(interval) + if not self.isActiveMode(): + break config.loadTrackersFile() - s = time.time() - for address, site in list(self.sites.items()): - if not site.isServing(): - continue - gevent.spawn(self.announceSite, site).join(timeout=10) - time.sleep(1) - taken = time.time() - s - - # Query all trackers one-by-one in 20 minutes evenly distributed - sleep = max(0, 60 * 20 / len(config.trackers) - taken) - - self.log.debug("Site announce tracker done in %.3fs, sleeping for %.3fs..." % (taken, sleep)) - time.sleep(sleep) + log.info("reloadTrackerFilesThread stopped") # Detects if computer back from wakeup - def wakeupWatcher(self): + def wakeupWatcherThread(self): + log.info("wakeupWatcherThread started") last_time = time.time() last_my_ips = socket.gethostbyname_ex('')[2] - while 1: - time.sleep(30) + while self.isActiveMode(): + self.sleep(30) + if not self.isActiveMode(): + break is_time_changed = time.time() - max(self.last_request, last_time) > 60 * 3 if is_time_changed: # If taken more than 3 minute then the computer was in sleep mode - self.log.info( + log.info( "Wakeup detected: time warp from %0.f to %0.f (%0.f sleep seconds), acting like startup..." % (last_time, time.time(), time.time() - last_time) ) @@ -354,50 +659,130 @@ class FileServer(ConnectionServer): my_ips = socket.gethostbyname_ex('')[2] is_ip_changed = my_ips != last_my_ips if is_ip_changed: - self.log.info("IP change detected from %s to %s" % (last_my_ips, my_ips)) + log.info("IP change detected from %s to %s" % (last_my_ips, my_ips)) if is_time_changed or is_ip_changed: - self.checkSites(check_files=False, force_port_check=True) + invalid_interval=( + last_time - self.internet_outage_threshold - random.randint(60 * 5, 60 * 10), + time.time() + ) + self.invalidateUpdateTime(invalid_interval) + self.recheck_port = True + self.spawn(self.updateSites) last_time = time.time() last_my_ips = my_ips + log.info("wakeupWatcherThread stopped") + + def setOfflineMode(self, offline_mode): + ConnectionServer.setOfflineMode(self, offline_mode) + self.setupActiveMode() + + def setPassiveMode(self, passive_mode): + if self.passive_mode == passive_mode: + return + self.passive_mode = passive_mode + if self.passive_mode: + log.info("passive mode is ON"); + else: + log.info("passive mode is OFF"); + self.setupActiveMode() + + def isPassiveMode(self): + return self.passive_mode + + def setupActiveMode(self): + active_mode = (not self.passive_mode) and (not self.isOfflineMode()) + if self.active_mode == active_mode: + return + self.active_mode = active_mode + if self.active_mode: + log.info("active mode is ON"); + self.enterActiveMode(); + else: + log.info("active mode is OFF"); + self.leaveActiveMode(); + + def killActiveModeThreads(self): + for key, thread in list(self.active_mode_threads.items()): + if thread: + if not thread.ready(): + log.info("killing %s" % key) + gevent.kill(thread) + del self.active_mode_threads[key] + + def leaveActiveMode(self): + pass + + def enterActiveMode(self): + self.killActiveModeThreads() + x = self.active_mode_threads + p = self.active_mode_thread_pool + x["thread_keep_alive"] = p.spawn(self.keepAliveThread) + x["thread_wakeup_watcher"] = p.spawn(self.wakeupWatcherThread) + x["thread_sites_verification"] = p.spawn(self.sitesVerificationThread) + x["thread_reload_tracker_files"] = p.spawn(self.reloadTrackerFilesThread) + x["thread_sites_maintenance_full"] = p.spawn(self.sitesMaintenanceThread, mode="full") + x["thread_sites_maintenance_short"] = p.spawn(self.sitesMaintenanceThread, mode="short") + x["thread_initial_site_updater"] = p.spawn(self.updateSites) + + # Returns True, if an active mode thread should keep going, + # i.e active mode is enabled and the server not going to shutdown + def isActiveMode(self): + self.setupActiveMode() + if not self.active_mode: + return False + if not self.running: + return False + if self.stopping: + return False + return True # Bind and start serving sites - def start(self, check_sites=True): + # If passive_mode is False, FileServer starts the full-featured file serving: + # * Checks for updates at startup. + # * Checks site's integrity. + # * Runs periodic update checks. + # * Watches for internet being up or down and for computer to wake up and runs update checks. + # If passive_mode is True, all the mentioned activity is disabled. + def start(self, passive_mode=False, check_sites=None, check_connections=True): + + # Backward compatibility for a misnamed argument: + if check_sites is not None: + passive_mode = not check_sites + if self.stopping: return False - ConnectionServer.start(self) + ConnectionServer.start(self, check_connections=check_connections) try: self.stream_server.start() except Exception as err: - self.log.error("Error listening on: %s:%s: %s" % (self.ip, self.port, err)) + log.error("Error listening on: %s:%s: %s" % (self.ip, self.port, err)) - self.sites = self.site_manager.list() if config.debug: # Auto reload FileRequest on change from Debug import DebugReloader DebugReloader.watcher.addCallback(self.reload) - if check_sites: # Open port, Update sites, Check files integrity - gevent.spawn(self.checkSites) + # XXX: for initializing self.sites + # Remove this line when self.sites gets completely unused + self.getSites() - thread_announce_sites = gevent.spawn(self.announceSites) - thread_cleanup_sites = gevent.spawn(self.cleanupSites) - thread_wakeup_watcher = gevent.spawn(self.wakeupWatcher) + self.setPassiveMode(passive_mode) ConnectionServer.listen(self) - self.log.debug("Stopped.") + log.info("Stopped.") - def stop(self): + def stop(self, ui_websocket=None): if self.running and self.portchecker.upnp_port_opened: - self.log.debug('Closing port %d' % self.port) + log.debug('Closing port %d' % self.port) try: self.portchecker.portClose(self.port) - self.log.info('Closed port via upnp.') + log.info('Closed port via upnp.') except Exception as err: - self.log.info("Failed at attempt to use upnp to close port: %s" % err) + log.info("Failed at attempt to use upnp to close port: %s" % err) - return ConnectionServer.stop(self) + return ConnectionServer.stop(self, ui_websocket=ui_websocket) diff --git a/src/Peer/Peer.py b/src/Peer/Peer.py index 03cc1f47..43c2932f 100644 --- a/src/Peer/Peer.py +++ b/src/Peer/Peer.py @@ -20,51 +20,135 @@ if config.use_tempfiles: # Communicate remote peers @PluginManager.acceptPlugins class Peer(object): - __slots__ = ( - "ip", "port", "site", "key", "connection", "connection_server", "time_found", "time_response", "time_hashfield", - "time_added", "has_hashfield", "is_tracker_connection", "time_my_hashfield_sent", "last_ping", "reputation", - "last_content_json_update", "hashfield", "connection_error", "hash_failed", "download_bytes", "download_time" - ) - def __init__(self, ip, port, site=None, connection_server=None): self.ip = ip self.port = port self.site = site self.key = "%s:%s" % (ip, port) + self.ip_type = None + + self.removed = False + + self.log_level = logging.DEBUG + self.connection_error_log_level = logging.DEBUG + self.connection = None self.connection_server = connection_server self.has_hashfield = False # Lazy hashfield object not created yet self.time_hashfield = None # Last time peer's hashfiled downloaded self.time_my_hashfield_sent = None # Last time my hashfield sent to peer self.time_found = time.time() # Time of last found in the torrent tracker - self.time_response = None # Time of last successful response from peer + self.time_response = 0 # Time of last successful response from peer self.time_added = time.time() self.last_ping = None # Last response time for ping + self.last_pex = 0 # Last query/response time for pex self.is_tracker_connection = False # Tracker connection instead of normal peer self.reputation = 0 # More likely to connect if larger self.last_content_json_update = 0.0 # Modify date of last received content.json + self.protected = 0 + self.reachable = None self.connection_error = 0 # Series of connection error self.hash_failed = 0 # Number of bad files from peer self.download_bytes = 0 # Bytes downloaded self.download_time = 0 # Time spent to download + self.protectedRequests = ["getFile", "streamFile", "update", "listModified"] + def __getattr__(self, key): if key == "hashfield": self.has_hashfield = True self.hashfield = PeerHashfield() return self.hashfield else: - return getattr(self, key) + # Raise appropriately formatted attribute error + return object.__getattribute__(self, key) + + def log(self, text, log_level = None): + if log_level is None: + log_level = self.log_level + if log_level <= logging.DEBUG: + if not config.verbose: + return # Only log if we are in debug mode + + logger = None - def log(self, text): - if not config.verbose: - return # Only log if we are in debug mode if self.site: - self.site.log.debug("%s:%s %s" % (self.ip, self.port, text)) + logger = self.site.log else: - logging.debug("%s:%s %s" % (self.ip, self.port, text)) + logger = logging.getLogger() + + logger.log(log_level, "%s:%s %s" % (self.ip, self.port, text)) + + # Protect connection from being closed by site.cleanupPeers() + def markProtected(self, interval=60*2): + self.protected = max(self.protected, time.time() + interval) + + def isProtected(self): + if self.protected > 0: + if self.protected < time.time(): + self.protected = 0 + return self.protected > 0 + + def isTtlExpired(self, ttl): + last_activity = max(self.time_found, self.time_response) + return (time.time() - last_activity) > ttl + + # Since 0.8.0 + def isConnected(self): + if self.connection and not self.connection.connected: + self.connection = None + return self.connection and self.connection.connected + + # Peer proved to to be connectable recently + # Since 0.8.0 + def isConnectable(self): + if self.connection_error >= 1: # The last connection attempt failed + return False + if time.time() - self.time_response > 60 * 60 * 2: # Last successful response more than 2 hours ago + return False + return self.isReachable() + + # Since 0.8.0 + def isReachable(self): + if self.reachable is None: + self.updateCachedState() + return self.reachable + + # Since 0.8.0 + def getIpType(self): + if not self.ip_type: + self.updateCachedState() + return self.ip_type + + # We cache some ConnectionServer-related state for better performance. + # This kind of state currently doesn't change during a program session, + # and it's safe to read and cache it just once. But future versions + # may bring more pieces of dynamic configuration. So we update the state + # on each peer.found(). + def updateCachedState(self): + connection_server = self.getConnectionServer() + if not self.port or self.port == 1: # Port 1 considered as "no open port" + self.reachable = False + else: + self.reachable = connection_server.isIpReachable(self.ip) + self.ip_type = connection_server.getIpType(self.ip) + + + # FIXME: + # This should probably be changed. + # When creating a peer object, the caller must provide either `connection_server`, + # or `site`, so Peer object is able to use `site.connection_server`. + def getConnectionServer(self): + if self.connection_server: + connection_server = self.connection_server + elif self.site: + connection_server = self.site.connection_server + else: + import main + connection_server = main.file_server + return connection_server # Connect to host def connect(self, connection=None): @@ -87,29 +171,30 @@ class Peer(object): self.connection = None try: - if self.connection_server: - connection_server = self.connection_server - elif self.site: - connection_server = self.site.connection_server - else: - import main - connection_server = main.file_server + connection_server = self.getConnectionServer() self.connection = connection_server.getConnection(self.ip, self.port, site=self.site, is_tracker_connection=self.is_tracker_connection) - self.reputation += 1 - self.connection.sites += 1 + if self.connection and self.connection.connected: + self.reputation += 1 + self.connection.sites += 1 except Exception as err: self.onConnectionError("Getting connection error") self.log("Getting connection error: %s (connection_error: %s, hash_failed: %s)" % - (Debug.formatException(err), self.connection_error, self.hash_failed)) + (Debug.formatException(err), self.connection_error, self.hash_failed), + log_level=self.connection_error_log_level) self.connection = None return self.connection + def disconnect(self, reason="Unknown"): + if self.connection: + self.connection.close(reason) + self.connection = None + # Check if we have connection to peer def findConnection(self): if self.connection and self.connection.connected: # We have connection to peer return self.connection else: # Try to find from other sites connections - self.connection = self.site.connection_server.getConnection(self.ip, self.port, create=False, site=self.site) + self.connection = self.getConnectionServer().getConnection(self.ip, self.port, create=False, site=self.site) if self.connection: self.connection.sites += 1 return self.connection @@ -143,9 +228,13 @@ class Peer(object): if source in ("tracker", "local"): self.site.peers_recent.appendleft(self) self.time_found = time.time() + self.updateCachedState() # Send a command to peer and return response value def request(self, cmd, params={}, stream_to=None): + if self.removed: + return False + if not self.connection or self.connection.closed: self.connect() if not self.connection: @@ -156,6 +245,8 @@ class Peer(object): for retry in range(1, 4): # Retry 3 times try: + if cmd in self.protectedRequests: + self.markProtected() if not self.connection: raise Exception("No connection found") res = self.connection.request(cmd, params, stream_to) @@ -188,6 +279,9 @@ class Peer(object): # Get a file content from peer def getFile(self, site, inner_path, file_size=None, pos_from=0, pos_to=None, streaming=False): + if self.removed: + return False + if file_size and file_size > 5 * 1024 * 1024: max_read_size = 1024 * 1024 else: @@ -241,11 +335,14 @@ class Peer(object): return buff # Send a ping request - def ping(self): + def ping(self, timeout=10.0, tryes=3): + if self.removed: + return False + response_time = None - for retry in range(1, 3): # Retry 3 times + for retry in range(1, tryes): # Retry 3 times s = time.time() - with gevent.Timeout(10.0, False): # 10 sec timeout, don't raise exception + with gevent.Timeout(timeout, False): res = self.request("ping") if res and "body" in res and res["body"] == b"Pong!": @@ -264,10 +361,18 @@ class Peer(object): return response_time # Request peer exchange from peer - def pex(self, site=None, need_num=5): + def pex(self, site=None, need_num=5, request_interval=60*2): + if self.removed: + return False + if not site: site = self.site # If no site defined request peers for this site + if self.last_pex + request_interval >= time.time(): + return False + + self.last_pex = time.time() + # give back 5 connectible peers packed_peers = helper.packPeers(self.site.getConnectablePeers(5, allow_private=False)) request = {"site": site.address, "peers": packed_peers["ipv4"], "need": need_num} @@ -276,6 +381,7 @@ class Peer(object): if packed_peers["ipv6"]: request["peers_ipv6"] = packed_peers["ipv6"] res = self.request("pex", request) + self.last_pex = time.time() if not res or "error" in res: return False added = 0 @@ -307,9 +413,14 @@ class Peer(object): # List modified files since the date # Return: {inner_path: modification date,...} def listModified(self, since): + if self.removed: + return False return self.request("listModified", {"since": since, "site": self.site.address}) def updateHashfield(self, force=False): + if self.removed: + return False + # Don't update hashfield again in 5 min if self.time_hashfield and time.time() - self.time_hashfield < 5 * 60 and not force: return False @@ -325,6 +436,9 @@ class Peer(object): # Find peers for hashids # Return: {hash1: ["ip:port", "ip:port",...],...} def findHashIds(self, hash_ids): + if self.removed: + return False + res = self.request("findHashIds", {"site": self.site.address, "hash_ids": hash_ids}) if not res or "error" in res or type(res) is not dict: return False @@ -368,6 +482,9 @@ class Peer(object): return True def publish(self, address, inner_path, body, modified, diffs=[]): + if self.removed: + return False + if len(body) > 10 * 1024 and self.connection and self.connection.handshake.get("rev", 0) >= 4095: # To save bw we don't push big content.json to peers body = b"" @@ -382,20 +499,22 @@ class Peer(object): # Stop and remove from site def remove(self, reason="Removing"): - self.log("Removing peer...Connection error: %s, Hash failed: %s" % (self.connection_error, self.hash_failed)) - if self.site and self.key in self.site.peers: - del(self.site.peers[self.key]) + self.removed = True + self.log("Removing peer with reason: <%s>. Connection error: %s, Hash failed: %s" % (reason, self.connection_error, self.hash_failed)) + if self.site: + self.site.deregisterPeer(self) + # No way: self.site = None + # We don't assign None to self.site here because it leads to random exceptions in various threads, + # that hold references to the peer and still believe it belongs to the site. - if self.site and self in self.site.peers_recent: - self.site.peers_recent.remove(self) - - if self.connection: - self.connection.close(reason) + self.disconnect(reason) # - EVENTS - # On connection error def onConnectionError(self, reason="Unknown"): + if not self.getConnectionServer().isInternetOnline(): + return self.connection_error += 1 if self.site and len(self.site.peers) > 200: limit = 3 @@ -403,7 +522,7 @@ class Peer(object): limit = 6 self.reputation -= 1 if self.connection_error >= limit: # Dead peer - self.remove("Peer connection: %s" % reason) + self.remove("Connection error limit reached: %s. Provided message: %s" % (limit, reason)) # Done working with peer def onWorkerDone(self): diff --git a/src/Site/Site.py b/src/Site/Site.py index 354fe9c0..46e19169 100644 --- a/src/Site/Site.py +++ b/src/Site/Site.py @@ -6,11 +6,13 @@ import time import random import sys import hashlib +import itertools import collections import base64 import gevent import gevent.pool +import gevent.lock import util from Config import config @@ -27,6 +29,125 @@ from Plugin import PluginManager from File import FileServer from .SiteAnnouncer import SiteAnnouncer from . import SiteManager +from . import SiteHelpers + +def lerp(val_min, val_max, scale): + return scale * (val_max - val_min) + val_min + +class ScaledTimeoutHandler: + def __init__(self, val_min, val_max, handler=None, scaler=None): + self.val_min = val_min + self.val_max = val_max + self.timestamp = 0 + self.handler = handler + self.scaler = scaler + self.log = logging.getLogger("ScaledTimeoutHandler") + + def isExpired(self, scale): + interval = lerp(self.val_min, self.val_max, scale) + expired_at = self.timestamp + interval + now = time.time() + expired = (now > expired_at) + if expired: + self.log.debug( + "Expired: [%d..%d]: scale=%f, interval=%f", + self.val_min, self.val_max, scale, interval) + return expired + + def done(self): + self.timestamp = time.time() + + def run(self, *args, **kwargs): + do_run = kwargs["force"] or self.isExpired(self.scaler()) + if do_run: + result = self.handler(*args, **kwargs) + if result: + self.done() + return result + else: + return None + +class BackgroundPublisher: + def __init__(self, site, published=[], limit=5, inner_path="content.json", diffs={}): + self.site = site + self.threads = gevent.pool.Pool(None) + self.inner_path = inner_path + self.stages = [ + { + "interval": ScaledTimeoutHandler(60, 60), + "max_tries": 2, + "tries": 0, + "limit": 0, + "done": False + }, + { + "interval": ScaledTimeoutHandler(60 * 10, 60 * 10), + "max_tries": 5, + "tries": 0, + "limit": 0, + "done": False + } + ] + self.reinit(published=published, limit=limit, diffs=diffs) + + def reinit(self, published=[], limit=5, diffs={}): + self.threads.kill() + self.published = published + self.diffs = diffs + + i = 0 + for stage in self.stages: + stage["nr"] = i + stage["limit"] = limit * (2 + i) + stage["tries"] = 0 + stage["done"] = False + stage["thread"] = None + if i > 0: + stage["interval"].done() + i += 1 + + def isStageComplete(self, stage): + if not stage["done"]: + stage["done"] = len(self.published) >= stage["limit"] + if not stage["done"]: + stage["done"] = stage["tries"] >= stage["max_tries"] + return stage["done"] + + def isComplete(self): + for stage in self.stages: + if not self.isStageComplete(stage): + return False + return True + + def process(self): + for stage in self.stages: + if not self.isStageComplete(stage): + self.processStage(stage) + break + return self.isComplete() + + def processStage(self, stage): + if not stage["interval"].isExpired(0): + return + + if len(self.site.peers) < stage["limit"]: + self.site.announce(mode="more") + + if not stage["thread"]: + peers = list(self.site.peers.values()) + random.shuffle(peers) + stage["thread"] = self.threads.spawn(self.site.publisher, + self.inner_path, peers, self.published, stage["limit"], diffs=self.diffs, max_retries=1) + + stage["tries"] += 1 + stage["interval"].done() + + self.site.log.info("Background publisher: Stage #%s: %s published to %s/%s peers", + stage["nr"], self.inner_path, len(self.published), stage["limit"]) + + def finalize(self): + self.threads.kill() + self.site.log.info("Background publisher: Published %s to %s peers", self.inner_path, len(self.published)) @PluginManager.acceptPlugins @@ -40,23 +161,35 @@ class Site(object): self.log = logging.getLogger("Site:%s" % self.address_short) self.addEventListeners() - self.content = None # Load content.json - self.peers = {} # Key: ip:port, Value: Peer.Peer - self.peers_recent = collections.deque(maxlen=150) - self.peer_blacklist = SiteManager.peer_blacklist # Ignore this peers (eg. myself) - self.greenlet_manager = GreenletManager.GreenletManager() # Running greenlets - self.worker_manager = WorkerManager(self) # Handle site download from other peers - self.bad_files = {} # SHA check failed files, need to redownload {"inner.content": 1} (key: file, value: failed accept) - self.content_updated = None # Content.js update time - self.notifications = [] # Pending notifications displayed once on page load [error|ok|info, message, timeout] - self.page_requested = False # Page viewed in browser - self.websockets = [] # Active site websocket connections + self.periodic_maintenance_handlers = [ + ScaledTimeoutHandler( + config.site_announce_interval_max * 60, + config.site_announce_interval_min * 60, + handler=self.periodicMaintenanceHandler_announce, + scaler=self.getAnnounceRating), + ScaledTimeoutHandler( + config.site_peer_check_interval_max * 60, + config.site_peer_check_interval_min * 60, + handler=self.periodicMaintenanceHandler_peer_check, + scaler=self.getAnnounceRating), + ScaledTimeoutHandler( + config.site_update_check_interval_max * 60, + config.site_update_check_interval_min * 60, + handler=self.periodicMaintenanceHandler_general, + scaler=self.getActivityRating) + ] + self.background_publishers = {} + self.background_publishers_lock = gevent.lock.RLock() + + # FZS = forced zero "since" + self.fzs_range = 20 + self.fzs_interval = 30 * 60 + self.fzs_count = random.randint(0, self.fzs_range / 4) + self.fzs_timestamp = 0 + + ############################################## self.connection_server = None - self.loadSettings(settings) # Load settings from sites.json - self.storage = SiteStorage(self, allow_create=allow_create) # Save and load site files - self.content_manager = ContentManager(self) - self.content_manager.loadContents() # Load content.json files if "main" in sys.modules: # import main has side-effects, breaks tests import main if "file_server" in dir(main): # Use global file server by default if possible @@ -66,9 +199,32 @@ class Site(object): self.connection_server = main.file_server else: self.connection_server = FileServer() + ############################################## + + self.content = None # Load content.json + self.peers = {} # Key: ip:port, Value: Peer.Peer + self.peers_recent = collections.deque(maxlen=150) + self.peer_blacklist = SiteManager.peer_blacklist # Ignore this peers (eg. myself) + self.greenlet_manager = GreenletManager.GreenletManager(self.connection_server.site_pool) # Running greenlets + self.worker_manager = WorkerManager(self) # Handle site download from other peers + self.bad_files = {} # SHA check failed files, need to redownload {"inner.content": 1} (key: file, value: failed accept) + self.content_updated = None # Content.js update time + self.last_online_update = 0 + self.startup_announce_done = 0 + self.notifications = [] # Pending notifications displayed once on page load [error|ok|info, message, timeout] + self.page_requested = False # Page viewed in browser + self.websockets = [] # Active site websocket connections + + self.loadSettings(settings) # Load settings from sites.json + self.storage = SiteStorage(self, allow_create=allow_create) # Save and load site files + self.content_manager = ContentManager(self) + self.content_manager.loadContents() # Load content.json files self.announcer = SiteAnnouncer(self) # Announce and get peer list from other nodes + self.peer_connector = SiteHelpers.PeerConnector(self) # Connect more peers in background by request + self.persistent_peer_req = None # The persistent peer requirement, managed by maintenance handler + if not self.settings.get("wrapper_key"): # To auth websocket permissions self.settings["wrapper_key"] = CryptHash.random() self.log.debug("New wrapper key: %s" % self.settings["wrapper_key"]) @@ -89,6 +245,10 @@ class Site(object): settings = json.load(open("%s/sites.json" % config.data_dir)).get(self.address) if settings: self.settings = settings + if "check_files_timestamp" not in settings: + settings["check_files_timestamp"] = 0 + if "verify_files_timestamp" not in settings: + settings["verify_files_timestamp"] = 0 if "cache" not in settings: settings["cache"] = {} if "size_files_optional" not in settings: @@ -104,8 +264,17 @@ class Site(object): self.bad_files[inner_path] = min(self.bad_files[inner_path], 20) else: self.settings = { - "own": False, "serving": True, "permissions": [], "cache": {"bad_files": {}}, "size_files_optional": 0, - "added": int(time.time()), "downloaded": None, "optional_downloaded": 0, "size_optional": 0 + "check_files_timestamp": 0, + "verify_files_timestamp": 0, + "own": False, + "serving": True, + "permissions": [], + "cache": {"bad_files": {}}, + "size_files_optional": 0, + "added": int(time.time()), + "downloaded": None, + "optional_downloaded": 0, + "size_optional": 0 } # Default if config.download_optional == "auto": self.settings["autodownloadoptional"] = True @@ -125,12 +294,38 @@ class Site(object): SiteManager.site_manager.load(False) SiteManager.site_manager.saveDelayed() + # Returns True if any site-related activity should be interrupted + # due to connection server being stopped or site being deleted + def isStopping(self): + return self.connection_server.stopping or self.settings.get("deleting", False) + + # Returns False if any network activity for the site should not happen def isServing(self): - if config.offline: + if self.connection_server.isOfflineMode(): + return False + elif self.isStopping(): return False else: return self.settings["serving"] + # Spawns a thread that will be waited for on server being stopped (and killed after a timeout). + # Short cut to self.greenlet_manager.spawn() + def spawn(self, *args, **kwargs): + thread = self.greenlet_manager.spawn(*args, **kwargs) + return thread + + # Spawns a thread that will be waited for on server being stopped (and killed after a timeout). + # Short cut to self.greenlet_manager.spawnLater() + def spawnLater(self, *args, **kwargs): + thread = self.greenlet_manager.spawnLater(*args, **kwargs) + return thread + + def checkSendBackLRU(self, peer, inner_path, remote_modified): + return SiteManager.site_manager.checkSendBackLRU(self, peer, inner_path, remote_modified) + + def addToSendBackLRU(self, peer, inner_path, modified): + return SiteManager.site_manager.addToSendBackLRU(self, peer, inner_path, modified) + def getSettingsCache(self): back = {} back["bad_files"] = self.bad_files @@ -141,6 +336,29 @@ class Site(object): def getSizeLimit(self): return self.settings.get("size_limit", int(config.size_limit)) + def isFileVerificationExpired(self, check_files_interval, verify_files_interval): + now = time.time() + check_files_timestamp = self.settings.get("check_files_timestamp", 0) + verify_files_timestamp = self.settings.get("verify_files_timestamp", 0) + + if check_files_interval is None: + check_files_expiration = now + 1 + else: + check_files_expiration = check_files_timestamp + check_files_interval + + if verify_files_interval is None: + verify_files_expiration = now + 1 + else: + verify_files_expiration = verify_files_timestamp + verify_files_interval + + if verify_files_expiration < now: + return "verify" + + if check_files_expiration < now: + return "check" + + return False + # Next size limit based on current size def getNextSizeLimit(self): size_limits = [10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000, 100000] @@ -153,7 +371,7 @@ class Site(object): def isAddedRecently(self): return time.time() - self.settings.get("added", 0) < 60 * 60 * 24 - # Download all file from content.json + # Download all files from content.json def downloadContent(self, inner_path, download_files=True, peer=None, check_modifications=False, diffs={}): s = time.time() if config.verbose: @@ -243,7 +461,7 @@ class Site(object): # Optionals files if inner_path == "content.json": - gevent.spawn(self.updateHashfield) + self.spawn(self.updateHashfield) for file_relative_path in list(self.content_manager.contents[inner_path].get("files_optional", {}).keys()): file_inner_path = content_inner_dir + file_relative_path @@ -262,7 +480,7 @@ class Site(object): include_threads = [] for file_relative_path in list(self.content_manager.contents[inner_path].get("includes", {}).keys()): file_inner_path = content_inner_dir + file_relative_path - include_thread = gevent.spawn(self.downloadContent, file_inner_path, download_files=download_files, peer=peer) + include_thread = self.spawn(self.downloadContent, file_inner_path, download_files=download_files, peer=peer) include_threads.append(include_thread) if config.verbose: @@ -282,6 +500,11 @@ class Site(object): inner_path, time.time() - s, len(self.worker_manager.tasks) )) + + # If no file tasks have been started, worker_manager.checkComplete() + # never called. So call it explicitly. + self.greenlet_manager.spawn(self.worker_manager.checkComplete) + return True # Return bad files with less than 3 retry @@ -337,9 +560,9 @@ class Site(object): ) if self.isAddedRecently(): - gevent.spawn(self.announce, mode="start", force=True) + self.spawn(self.announce, mode="start", force=True) else: - gevent.spawn(self.announce, mode="update") + self.spawn(self.announce, mode="update") if check_size: # Check the size first valid = self.downloadContent("content.json", download_files=False) # Just download content.json files @@ -391,14 +614,18 @@ class Site(object): self.log.debug("Ended downloadFile pool len: %s, skipped: %s" % (len(inner_paths), num_skipped)) # Update worker, try to find client that supports listModifications command - def updater(self, peers_try, queried, since): + def updater(self, peers_try, queried, need_queries, since): threads = [] while 1: - if not peers_try or len(queried) >= 3: # Stop after 3 successful query + if not peers_try or len(queried) >= need_queries: # Stop after 3 successful query break peer = peers_try.pop(0) + + if peer in queried: + continue + if config.verbose: - self.log.debug("CheckModifications: Try to get updates from: %s Left: %s" % (peer, peers_try)) + self.log.debug("CheckModifications: Trying to get updates from: %s Left: %s" % (peer, peers_try)) res = None with gevent.Timeout(20, exception=False): @@ -408,37 +635,94 @@ class Site(object): continue # Failed query queried.append(peer) + + modified_contents = [] - my_modified = self.content_manager.listModified(since) - num_old_files = 0 - for inner_path, modified in res["modified_files"].items(): # Check if the peer has newer files than we - has_newer = int(modified) > my_modified.get(inner_path, 0) - has_older = int(modified) < my_modified.get(inner_path, 0) - if inner_path not in self.bad_files and not self.content_manager.isArchived(inner_path, modified): - if has_newer: - # We dont have this file or we have older + send_back = [] + send_back_limit = config.send_back_limit + send_back_skipped = 0 + peer_modified_files = res["modified_files"] + my_modified_files = self.content_manager.listModified(since) + + inner_paths = itertools.chain(peer_modified_files.keys(), my_modified_files.keys()) + seen_inner_paths = {} + for inner_path in inner_paths: # Check if the peer has newer files than we have + if seen_inner_paths.get(inner_path, False): + continue + seen_inner_paths[inner_path] = True + + peer_modified = int(peer_modified_files.get(inner_path, 0)) + my_modified = int(my_modified_files.get(inner_path, 0)) + + diff = peer_modified - my_modified + if diff == 0: + continue + has_newer = diff > 0 + has_older = diff < 0 + + if inner_path not in self.bad_files and not self.content_manager.isArchived(inner_path, peer_modified): + if has_newer: # We don't have this file or we have older version modified_contents.append(inner_path) - self.bad_files[inner_path] = self.bad_files.get(inner_path, 0) + 1 - if has_older and num_old_files < 5: - num_old_files += 1 - self.log.debug("CheckModifications: %s client has older version of %s, publishing there (%s/5)..." % (peer, inner_path, num_old_files)) - gevent.spawn(self.publisher, inner_path, [peer], [], 1) + self.bad_files[inner_path] = self.bad_files.get(inner_path, 1) + if has_older: # The remote peer doesn't have this file or it has older version + if self.checkSendBackLRU(peer, inner_path, peer_modified): + send_back_skipped += 1 + else: + send_back.append(inner_path) + + inner_paths = None + seen_inner_paths = None + if modified_contents: - self.log.debug("CheckModifications: %s new modified file from %s" % (len(modified_contents), peer)) - modified_contents.sort(key=lambda inner_path: 0 - res["modified_files"][inner_path]) # Download newest first - t = gevent.spawn(self.pooledDownloadContent, modified_contents, only_if_bad=True) + self.log.info("CheckModifications: %s new modified files from %s" % (len(modified_contents), peer)) + modified_contents.sort(key=lambda inner_path: 0 - peer_modified_files[inner_path]) # Download newest first + for inner_path in modified_contents: + self.log.info("CheckModifications: %s: %s > %s" % ( + inner_path, peer_modified_files.get(inner_path, 0), my_modified_files.get(inner_path, 0) + )) + t = self.spawn(self.pooledDownloadContent, modified_contents, only_if_bad=True) threads.append(t) - if config.verbose: - self.log.debug("CheckModifications: Waiting for %s pooledDownloadContent" % len(threads)) + + if send_back: + self.log.info("CheckModifications: %s has older versions of %s files" % (peer, len(send_back))) + if len(send_back) > send_back_limit: + self.log.info("CheckModifications: choosing %s random files to publish back" % (send_back_limit)) + random.shuffle(send_back) + send_back = send_back[0:send_back_limit] + for inner_path in send_back: + self.log.info("CheckModifications: %s: %s < %s" % ( + inner_path, peer_modified_files.get(inner_path, 0), my_modified_files.get(inner_path, 0) + )) + self.spawn(self.publisher, inner_path, [peer], [], 1, save_to_send_back_lru=True) + + if send_back_skipped: + self.log.info("CheckModifications: %s has older versions of %s files, skipped according to send back LRU" % (peer, send_back_skipped)) + + self.log.debug("CheckModifications: Waiting for %s pooledDownloadContent" % len(threads)) gevent.joinall(threads) + # We need, with some rate, to perform the full check of modifications, + # "since the beginning of time", instead of the partial one. + def getForcedZeroSince(self): + now = time.time() + if self.fzs_timestamp + self.fzs_interval > now: + return False + self.fzs_count -= 1 + if self.fzs_count < 1: + self.fzs_count = random.randint(0, self.fzs_range) + self.fzs_timestamp = now + return True + return False + # Check modified content.json files from peers and add modified files to bad_files # Return: Successfully queried peers [Peer, Peer...] def checkModifications(self, since=None): s = time.time() peers_try = [] # Try these peers queried = [] # Successfully queried from these peers - limit = 5 + peer_limit = 10 + updater_limit = 3 + need_queries = 3 # Wait for peers if not self.peers: @@ -449,58 +733,116 @@ class Site(object): if self.peers: break - peers_try = self.getConnectedPeers() - peers_connected_num = len(peers_try) - if peers_connected_num < limit * 2: # Add more, non-connected peers if necessary - peers_try += self.getRecentPeers(limit * 5) + if since is None: + if self.getForcedZeroSince(): + since = 0 + else: + margin = 60 * 60 * 24 + since = self.settings.get("modified", margin) - margin - if since is None: # No since defined, download from last modification time-1day - since = self.settings.get("modified", 60 * 60 * 24) - 60 * 60 * 24 + if since == 0: + peer_limit *= 4 + need_queries *= 4 - if config.verbose: - self.log.debug( - "CheckModifications: Try to get listModifications from peers: %s, connected: %s, since: %s" % - (peers_try, peers_connected_num, since) - ) + peers_try = self.getConnectedPeers() + self.getConnectablePeers(peer_limit) + + self.log.debug( + "CheckModifications: Trying to get listModifications from %s peers, %s connected, since: %s" % + (len(peers_try), len(self.getConnectedPeers()), since) + ) updaters = [] - for i in range(3): - updaters.append(gevent.spawn(self.updater, peers_try, queried, since)) + for i in range(updater_limit): + updaters.append(self.spawn(self.updater, peers_try, queried, need_queries, since)) - gevent.joinall(updaters, timeout=10) # Wait 10 sec to workers done query modifications + for r in range(10): + gevent.joinall(updaters, timeout=5+r) + if len(queried) >= need_queries or len(peers_try) == 0: + break + self.log.debug("CheckModifications: Waiting... (%s) succesfully queried: %s, left: %s" % + (r + 1, len(queried), len(peers_try))) - if not queried: # Start another 3 thread if first 3 is stuck - peers_try[0:0] = [peer for peer in self.getConnectedPeers() if peer.connection.connected] # Add connected peers - for _ in range(10): - gevent.joinall(updaters, timeout=10) # Wait another 10 sec if none of updaters finished - if queried: - break - - self.log.debug("CheckModifications: Queried listModifications from: %s in %.3fs since %s" % (queried, time.time() - s, since)) + self.log.debug("CheckModifications: Queried listModifications from %s peers in %.3fs since %s" % ( + len(queried), time.time() - s, since)) time.sleep(0.1) return queried + def invalidateUpdateTime(self, invalid_interval): + a, b = invalid_interval + if b is None: + b = time.time() + if a is None: + a = b + if a <= self.last_online_update and self.last_online_update <= b: + self.last_online_update = 0 + self.log.debug("Update time invalidated") + + def isUpdateTimeValid(self): + if not self.last_online_update: + return False + expirationThreshold = 60 * 60 * 6 + return self.last_online_update > time.time() - expirationThreshold + + def refreshUpdateTime(self, valid=True): + if valid: + self.last_online_update = time.time() + else: + self.last_online_update = 0 + # Update content.json from peers and download changed files # Return: None @util.Noparallel() - def update(self, announce=False, check_files=False, since=None): + def update(self, announce=False, check_files=False, verify_files=False, since=None): + online = self.connection_server.isInternetOnline() + self.content_manager.loadContent("content.json", load_includes=False) # Reload content.json self.content_updated = None # Reset content updated time + if verify_files: + check_files = True + + if verify_files: + self.updateWebsocket(verifying=True) + elif check_files: + self.updateWebsocket(checking=True) if check_files: - self.storage.updateBadFiles(quick_check=True) # Quick check and mark bad files based on file size + if verify_files: + self.storage.updateBadFiles(quick_check=False) # Full-featured checksum verification + else: + self.storage.updateBadFiles(quick_check=True) # Quick check and mark bad files based on file size + # Don't update the timestamps in case of the application being shut down, + # so we can make another try next time. + if not self.isStopping(): + self.settings["check_files_timestamp"] = time.time() + if verify_files: + self.settings["verify_files_timestamp"] = time.time() + + if verify_files: + self.updateWebsocket(verified=True) + elif check_files: + self.updateWebsocket(checked=True) if not self.isServing(): return False + if announce: + self.updateWebsocket(updating=True) + self.announce(mode="update", force=True) + + reqs = [ + self.peer_connector.newReq(4, 4, 30), + self.peer_connector.newReq(2, 2, 60), + self.peer_connector.newReq(1, 1, 120) + ] + nr_connected_peers = self.waitForPeers(reqs); + if nr_connected_peers < 1: + return + self.updateWebsocket(updating=True) # Remove files that no longer in content.json self.checkBadFiles() - if announce: - self.announce(mode="update", force=True) - # Full update, we can reset bad files if check_files and since == 0: self.bad_files = {} @@ -511,7 +853,7 @@ class Site(object): if self.bad_files: self.log.debug("Bad files: %s" % self.bad_files) - gevent.spawn(self.retryBadFiles, force=True) + self.spawn(self.retryBadFiles, force=True) if len(queried) == 0: # Failed to query modifications @@ -519,8 +861,19 @@ class Site(object): else: self.content_updated = time.time() + self.sendMyHashfield() + self.updateHashfield() + + online = online and self.connection_server.isInternetOnline() + self.refreshUpdateTime(valid=online) + self.updateWebsocket(updated=True) + # To be called from FileServer + @util.Noparallel(queue=True, ignore_args=True) + def update2(self, check_files=False, verify_files=False): + self.update(check_files=check_files, verify_files=verify_files) + # Update site by redownload all content.json def redownloadContents(self): # Download all content.json again @@ -532,7 +885,7 @@ class Site(object): gevent.joinall(content_threads) # Publish worker - def publisher(self, inner_path, peers, published, limit, diffs={}, event_done=None, cb_progress=None): + def publisher(self, inner_path, peers, published, limit, diffs={}, event_done=None, cb_progress=None, max_retries=2, save_to_send_back_lru=False): file_size = self.storage.getSize(inner_path) content_json_modified = self.content_manager.contents[inner_path]["modified"] body = self.storage.read(inner_path) @@ -557,7 +910,7 @@ class Site(object): timeout = 10 + int(file_size / 1024) result = {"exception": "Timeout"} - for retry in range(2): + for retry in range(max_retries): try: with gevent.Timeout(timeout, False): result = peer.publish(self.address, inner_path, body, content_json_modified, diffs) @@ -567,6 +920,12 @@ class Site(object): self.log.error("Publish error: %s" % Debug.formatException(err)) result = {"exception": Debug.formatException(err)} + # We add to the send back lru not only on success, but also on errors. + # Some peers returns None. (Why?) + # Anyway, we tried our best in delivering possibly lost updates. + if save_to_send_back_lru: + self.addToSendBackLRU(peer, inner_path, content_json_modified) + if result and "ok" in result: published.append(peer) if cb_progress and len(published) <= limit: @@ -578,40 +937,75 @@ class Site(object): self.log.info("[FAILED] %s: %s" % (peer.key, result)) time.sleep(0.01) + def addBackgroundPublisher(self, published=[], limit=5, inner_path="content.json", diffs={}): + with self.background_publishers_lock: + if self.background_publishers.get(inner_path, None): + background_publisher = self.background_publishers[inner_path] + background_publisher.reinit(published=published, limit=limit, diffs=diffs) + else: + background_publisher = BackgroundPublisher(self, published=published, limit=limit, inner_path=inner_path, diffs=diffs) + self.background_publishers[inner_path] = background_publisher + + self.spawn(background_publisher.process) + + def processBackgroundPublishers(self): + with self.background_publishers_lock: + for inner_path, background_publisher in list(self.background_publishers.items()): + background_publisher.process() + if background_publisher.isComplete(): + background_publisher.finalize() + del self.background_publishers[inner_path] + + def getPeersForForegroundPublishing(self, limit): + # Wait for some peers to appear + reqs = [ + self.peer_connector.newReq(limit, limit / 2, 10), # some of them... + self.peer_connector.newReq(1, 1, 60) # or at least one... + ] + self.waitForPeers(reqs, update_websocket=False) + + peers = self.getConnectedPeers() + random.shuffle(peers) + + # Prefer newer clients. + # Trying to deliver foreground updates to the latest version clients, + # expecting that they have better networking facilities. + # Note: background updates SHOULD NOT discriminate peers by their rev number, + # otherwise it can cause troubles in delivering updates to older versions. + peers = sorted(peers, key=lambda peer: peer.connection.handshake.get("rev", 0) < config.rev - 100) + + # Add more, non-connected peers if necessary + if len(peers) < limit * 2 and len(self.peers) > len(peers): + peers += self.getRecentPeers(limit * 2) + peers = set(peers) + + return peers + # Update content.json on peers @util.Noparallel() def publish(self, limit="default", inner_path="content.json", diffs={}, cb_progress=None): published = [] # Successfully published (Peer) publishers = [] # Publisher threads - if not self.peers: - self.announce(mode="more") - if limit == "default": limit = 5 threads = limit - peers = self.getConnectedPeers() - num_connected_peers = len(peers) - - random.shuffle(peers) - peers = sorted(peers, key=lambda peer: peer.connection.handshake.get("rev", 0) < config.rev - 100) # Prefer newer clients - - if len(peers) < limit * 2 and len(self.peers) > len(peers): # Add more, non-connected peers if necessary - peers += self.getRecentPeers(limit * 2) - - peers = set(peers) + peers = self.getPeersForForegroundPublishing(limit) self.log.info("Publishing %s to %s/%s peers (connected: %s) diffs: %s (%.2fk)..." % ( - inner_path, limit, len(self.peers), num_connected_peers, list(diffs.keys()), float(len(str(diffs))) / 1024 + inner_path, + limit, len(self.peers), len(self.getConnectedPeers()), + list(diffs.keys()), float(len(str(diffs))) / 1024 )) if not peers: + self.addBackgroundPublisher(published=published, limit=limit, inner_path=inner_path, diffs=diffs) return 0 # No peers found event_done = gevent.event.AsyncResult() for i in range(min(len(peers), limit, threads)): - publisher = gevent.spawn(self.publisher, inner_path, peers, published, limit, diffs, event_done, cb_progress) + publisher = self.spawn(self.publisher, inner_path, peers, published, limit, diffs, event_done, cb_progress) publishers.append(publisher) event_done.get() # Wait for done @@ -620,17 +1014,16 @@ class Site(object): if len(published) == 0: gevent.joinall(publishers) # No successful publish, wait for all publisher - # Publish more peers in the backgroup + # Publish to more peers in the background self.log.info( - "Published %s to %s peers, publishing to %s more peers in the background" % - (inner_path, len(published), limit) + "Published %s to %s peers, publishing to more peers in the background" % + (inner_path, len(published)) ) - for thread in range(2): - gevent.spawn(self.publisher, inner_path, peers, published, limit=limit * 2, diffs=diffs) + self.addBackgroundPublisher(published=published, limit=limit, inner_path=inner_path, diffs=diffs) # Send my hashfield to every connected peer if changed - gevent.spawn(self.sendMyHashfield, 100) + self.spawn(self.sendMyHashfield, 100) return len(published) @@ -793,7 +1186,7 @@ class Site(object): if not self.content_manager.contents.get("content.json"): # No content.json, download it first! self.log.debug("Need content.json first (inner_path: %s, priority: %s)" % (inner_path, priority)) if priority > 0: - gevent.spawn(self.announce) + self.spawn(self.announce) if inner_path != "content.json": # Prevent double download task = self.worker_manager.addTask("content.json", peer) task["evt"].get() @@ -819,7 +1212,7 @@ class Site(object): self.log.debug("%s: Download not allowed" % inner_path) return False - self.bad_files[inner_path] = self.bad_files.get(inner_path, 0) + 1 # Mark as bad file + self.bad_files[inner_path] = self.bad_files.get(inner_path, 1) # Mark as bad file task = self.worker_manager.addTask(inner_path, peer, priority=priority, file_info=file_info) if blocking: @@ -847,118 +1240,239 @@ class Site(object): peer = Peer(ip, port, self) self.peers[key] = peer peer.found(source) + + self.peer_connector.processReqs() + self.peer_connector.addPeer(peer) + return peer + # Called from peer.remove to erase links to peer + def deregisterPeer(self, peer): + self.peers.pop(peer.key, None) + try: + self.peers_recent.remove(peer) + except: + pass + self.peer_connector.deregisterPeer(peer) + def announce(self, *args, **kwargs): if self.isServing(): self.announcer.announce(*args, **kwargs) - # Keep connections to get the updates - def needConnections(self, num=None, check_site_on_reconnect=False): + def getActivityRating(self, force_safe=False): + age = time.time() - self.settings.get("modified", 0) + + if age < 60 * 60: + rating = 1.0 + elif age < 60 * 60 * 5: + rating = 0.8 + elif age < 60 * 60 * 24: + rating = 0.6 + elif age < 60 * 60 * 24 * 3: + rating = 0.4 + elif age < 60 * 60 * 24 * 7: + rating = 0.2 + else: + rating = 0.0 + + force_safe = force_safe or config.expose_no_ownership + + if (not force_safe) and self.settings["own"]: + rating = min(rating, 0.6) + + if self.content_updated is False: # Last check modification failed + rating += 0.1 + elif self.bad_files: + rating += 0.1 + + if rating > 1.0: + rating = 1.0 + + return rating + + def getAnnounceRating(self): + # rare frequent + # announces announces + # 0 ------------------- 1 + # activity -------------> -- active site ==> frequent announces + # <---------------- peers -- many peers ==> rare announces + # trackers -------------> -- many trackers ==> frequent announces to iterate over more trackers + + activity_rating = self.getActivityRating(force_safe=True) + + peer_count = len(self.peers) + peer_rating = 1.0 - min(peer_count, 50) / 50.0 + + tracker_count = self.announcer.getSupportedTrackerCount() + tracker_count = max(tracker_count, 1) + tracker_rating = 1.0 - (1.0 / tracker_count) + + v = [activity_rating, peer_rating, tracker_rating] + return sum(v) / float(len(v)) + + def getPreferableConnectablePeerCount(self): + if not self.isServing(): + return 0 + + count = lerp( + config.site_connectable_peer_count_min, + config.site_connectable_peer_count_max, + self.getActivityRating(force_safe=True)) + return count + + # The engine tries to maintain the number of active connections: + # >= getPreferableActiveConnectionCount() + # and + # <= getActiveConnectionCountLimit() + + def getPreferableActiveConnectionCount(self): + if not self.isServing(): + return 0 + + age = time.time() - self.settings.get("modified", 0) + count = int(10 * self.getActivityRating(force_safe=True)) + + if len(self.peers) < 50: + count = max(count, 5) + + count = min(count, config.connected_limit) + + return count + + def getActiveConnectionCountLimit(self): + count_above_preferable = 2 + limit = self.getPreferableActiveConnectionCount() + count_above_preferable + limit = min(limit, config.connected_limit) + return limit + + ############################################################################ + + # Returns the maximum value of current reqs for connections + def waitingForConnections(self): + self.peer_connector.processReqs() + return self.peer_connector.need_nr_connected_peers + + def needConnections(self, num=None, update_site_on_reconnect=False): + if not self.connection_server.allowsCreatingConnections(): + return + if num is None: - if len(self.peers) < 50: - num = 3 - else: - num = 6 - need = min(len(self.peers), num, config.connected_limit) # Need 5 peer, but max total peers + num = self.getPreferableActiveConnectionCount() + num = min(len(self.peers), num) - connected = len(self.getConnectedPeers()) + req = self.peer_connector.newReq(0, num) + return req - connected_before = connected + # Wait for peers to be discovered and/or connected according to reqs + # and send updates to the UI + def waitForPeers(self, reqs, update_websocket=True): + if not reqs: + return 0 + i = 0 + nr_connected_peers = -1 + while self.isServing(): + ready_reqs = list(filter(lambda req: req.ready(), reqs)) + if len(ready_reqs) == len(reqs): + if nr_connected_peers < 0: + nr_connected_peers = ready_reqs[0].nr_connected_peers + break + waiting_reqs = list(filter(lambda req: not req.ready(), reqs)) + if not waiting_reqs: + break + waiting_req = waiting_reqs[0] + #self.log.debug("waiting_req: %s %s %s", waiting_req.need_nr_connected_peers, waiting_req.nr_connected_peers, waiting_req.expiration_interval) + waiting_req.waitHeartbeat(timeout=1.0) + if i > 0 and nr_connected_peers != waiting_req.nr_connected_peers: + nr_connected_peers = waiting_req.nr_connected_peers + if update_websocket: + self.updateWebsocket(connecting_to_peers=nr_connected_peers) + i += 1 + if update_websocket: + self.updateWebsocket(connected_to_peers=max(nr_connected_peers, 0)) + if i > 1: + # If we waited some time, pause now for displaying connected_to_peers message in the UI. + # This sleep is solely needed for site status updates on ZeroHello to be more cool-looking. + gevent.sleep(1) + return nr_connected_peers - self.log.debug("Need connections: %s, Current: %s, Total: %s" % (need, connected, len(self.peers))) + ############################################################################ - if connected < need: # Need more than we have - for peer in self.getRecentPeers(30): - if not peer.connection or not peer.connection.connected: # No peer connection or disconnected - peer.pex() # Initiate peer exchange - if peer.connection and peer.connection.connected: - connected += 1 # Successfully connected - if connected >= need: - break - self.log.debug( - "Connected before: %s, after: %s. Check site: %s." % - (connected_before, connected, check_site_on_reconnect) - ) - - if check_site_on_reconnect and connected_before == 0 and connected > 0 and self.connection_server.has_internet: - gevent.spawn(self.update, check_files=False) - - return connected - - # Return: Probably peers verified to be connectable recently + # Return: Peers verified to be connectable recently, or if not enough, other peers as well def getConnectablePeers(self, need_num=5, ignore=[], allow_private=True): peers = list(self.peers.values()) - found = [] + random.shuffle(peers) + connectable_peers = [] + reachable_peers = [] for peer in peers: - if peer.key.endswith(":0"): - continue # Not connectable - if not peer.connection: - continue # No connection - if peer.ip.endswith(".onion") and not self.connection_server.tor_manager.enabled: - continue # Onion not supported if peer.key in ignore: - continue # The requester has this peer - if time.time() - peer.connection.last_recv_time > 60 * 60 * 2: # Last message more than 2 hours ago - peer.connection = None # Cleanup: Dead connection continue if not allow_private and helper.isPrivateIp(peer.ip): continue - found.append(peer) - if len(found) >= need_num: + if peer.isConnectable(): + connectable_peers.append(peer) + elif peer.isReachable(): + reachable_peers.append(peer) + if len(connectable_peers) >= need_num: break # Found requested number of peers - if len(found) < need_num: # Return not that good peers - found += [ - peer for peer in peers - if not peer.key.endswith(":0") and - peer.key not in ignore and - (allow_private or not helper.isPrivateIp(peer.ip)) - ][0:need_num - len(found)] + if len(connectable_peers) < need_num: # Return not that good peers + connectable_peers += reachable_peers[0:need_num - len(connectable_peers)] - return found + return connectable_peers # Return: Recently found peers + def getReachablePeers(self): + return [peer for peer in self.peers.values() if peer.isReachable()] + + # Return: Recently found peers, sorted by reputation. + # If there not enough recently found peers, adds other known peers with highest reputation def getRecentPeers(self, need_num): - found = list(set(self.peers_recent)) + need_num = int(need_num) + found = set(self.peers_recent) self.log.debug( "Recent peers %s of %s (need: %s)" % (len(found), len(self.peers), need_num) ) - if len(found) >= need_num or len(found) >= len(self.peers): - return sorted( - found, + if len(found) < need_num and len(found) < len(self.peers): + # Add random peers + peers = self.getReachablePeers() + peers = sorted( + list(peers), key=lambda peer: peer.reputation, reverse=True - )[0:need_num] + ) + while len(found) < need_num and len(peers) > 0: + found.add(peers.pop()) - # Add random peers - need_more = need_num - len(found) - if not self.connection_server.tor_manager.enabled: - peers = [peer for peer in self.peers.values() if not peer.ip.endswith(".onion")] - else: - peers = list(self.peers.values()) - - found_more = sorted( - peers[0:need_more * 50], + return sorted( + list(found), key=lambda peer: peer.reputation, reverse=True - )[0:need_more * 2] - - found += found_more + )[0:need_num] return found[0:need_num] - def getConnectedPeers(self): + # Returns the list of connected peers + # By default the result may contain peers chosen optimistically: + # If the connection is being established and 20 seconds have not yet passed + # since the connection start time, those peers are included in the result. + # Set only_fully_connected=True for restricting only by fully connected peers. + def getConnectedPeers(self, only_fully_connected=False): back = [] if not self.connection_server: return [] tor_manager = self.connection_server.tor_manager for connection in self.connection_server.connections: + if len(back) >= len(self.peers): # short cut for breaking early; no peers to check left + break + if not connection.connected and time.time() - connection.start_time > 20: # Still not connected after 20s continue + if not connection.connected and only_fully_connected: # Only fully connected peers + continue + peer = self.peers.get("%s:%s" % (connection.ip, connection.port)) if peer: if connection.ip.endswith(".onion") and connection.target_onion and tor_manager.start_onions: @@ -971,61 +1485,158 @@ class Site(object): back.append(peer) return back - # Cleanup probably dead peers and close connection if too much - def cleanupPeers(self, peers_protected=[]): + def removeDeadPeers(self): peers = list(self.peers.values()) - if len(peers) > 20: - # Cleanup old peers - removed = 0 - if len(peers) > 1000: - ttl = 60 * 60 * 1 - else: - ttl = 60 * 60 * 4 + if len(peers) <= 20: + return - for peer in peers: - if peer.connection and peer.connection.connected: - continue - if peer.connection and not peer.connection.connected: - peer.connection = None # Dead connection - if time.time() - peer.time_found > ttl: # Not found on tracker or via pex in last 4 hour - peer.remove("Time found expired") - removed += 1 - if removed > len(peers) * 0.1: # Don't remove too much at once - break + removed = 0 + if len(peers) > 10000: + ttl = 60 * 2 + elif len(peers) > 1000: + ttl = 60 * 60 * 1 + elif len(peers) > 100: + ttl = 60 * 60 * 4 + else: + ttl = 60 * 60 * 8 - if removed: - self.log.debug("Cleanup peers result: Removed %s, left: %s" % (removed, len(self.peers))) + for peer in peers: + if peer.isConnected() or peer.isProtected(): + continue + if peer.isTtlExpired(ttl): + peer.remove("TTL expired") + removed += 1 + if removed > len(peers) * 0.1: # Don't remove too much at once + break - # Close peers over the limit - closed = 0 - connected_peers = [peer for peer in self.getConnectedPeers() if peer.connection.connected] # Only fully connected peers - need_to_close = len(connected_peers) - config.connected_limit + if removed: + self.log.debug("Cleanup peers result: Removed %s, left: %s" % (removed, len(self.peers))) - if closed < need_to_close: - # Try to keep connections with more sites + # Cleanup probably dead peers and close connection if too much + def cleanupPeers(self): + self.removeDeadPeers() + + limit = max(self.getActiveConnectionCountLimit(), self.waitingForConnections()) + connected_peers = self.getConnectedPeers(only_fully_connected=True) + need_to_close = len(connected_peers) - limit + + if need_to_close > 0: + closed = 0 for peer in sorted(connected_peers, key=lambda peer: min(peer.connection.sites, 5)): - if not peer.connection: + if not peer.isConnected(): continue - if peer.key in peers_protected: + if peer.isProtected(): continue if peer.connection.sites > 5: break - peer.connection.close("Cleanup peers") - peer.connection = None + peer.disconnect("Cleanup peers") closed += 1 if closed >= need_to_close: break - if need_to_close > 0: - self.log.debug("Connected: %s, Need to close: %s, Closed: %s" % (len(connected_peers), need_to_close, closed)) + self.log.debug("Connected: %s, Need to close: %s, Closed: %s" % ( + len(connected_peers), need_to_close, closed)) + + def lookForConnectablePeers(self): + num_tries = 2 + need_connectable_peers = self.getPreferableConnectablePeerCount() + + connectable_peers = 0 + reachable_peers = [] + + for peer in list(self.peers.values()): + if peer.isConnected() or peer.isConnectable(): + connectable_peers += 1 + elif peer.isReachable(): + reachable_peers.append(peer) + if connectable_peers >= need_connectable_peers: + return True + + random.shuffle(reachable_peers) + + for peer in reachable_peers: + if peer.isConnected() or peer.isConnectable() or peer.removed: + continue + peer.ping() + if peer.isConnected(): + peer.pex() + num_tries -= 1 + if num_tries < 1: + break + + @util.Noparallel(queue=True) + def runPeriodicMaintenance(self, startup=False, force=False): + if not self.isServing(): + return False + + self.log.debug("runPeriodicMaintenance: startup=%s, force=%s" % (startup, force)) + + result = False + + for handler in self.periodic_maintenance_handlers: + result = result | bool(handler.run(startup=startup, force=force)) + + return result + + def periodicMaintenanceHandler_general(self, startup=False, force=False): + if not self.isServing(): + return False + + if not self.peers: + return False + + self.log.debug("periodicMaintenanceHandler_general: startup=%s, force=%s" % (startup, force)) + + #self.persistent_peer_req = self.needConnections(update_site_on_reconnect=True) + #self.persistent_peer_req.result_connected.wait(timeout=2.0) + + #self.announcer.announcePex() + + self.processBackgroundPublishers() + + self.update() + + return True + + def periodicMaintenanceHandler_peer_check(self, startup=False, force=False): + if not self.isServing(): + return False + + if not self.peers: + return False + + self.log.debug("periodicMaintenanceHandler_peer_check: startup=%s, force=%s" % (startup, force)) + + if not startup: + self.cleanupPeers() + + self.lookForConnectablePeers() + + return True + + def periodicMaintenanceHandler_announce(self, startup=False, force=False): + if not self.isServing(): + return False + + self.log.debug("periodicMaintenanceHandler_announce: startup=%s, force=%s" % (startup, force)) + + if startup and len(self.peers) < 10: + self.announce(mode="startup") + else: + self.announce(mode="update", pex=False) + + return True # Send hashfield to peers def sendMyHashfield(self, limit=5): + if not self.isServing(): + return False + if not self.content_manager.hashfield: # No optional files return False sent = 0 - connected_peers = self.getConnectedPeers() + connected_peers = self.getConnectedPeers(only_fully_connected=True) for peer in connected_peers: if peer.sendMyHashfield(): sent += 1 @@ -1038,13 +1649,16 @@ class Site(object): # Update hashfield def updateHashfield(self, limit=5): + if not self.isServing(): + return False + # Return if no optional files if not self.content_manager.hashfield and not self.content_manager.has_optional_files: return False s = time.time() queried = 0 - connected_peers = self.getConnectedPeers() + connected_peers = self.getConnectedPeers(only_fully_connected=True) for peer in connected_peers: if peer.time_hashfield: continue @@ -1082,10 +1696,10 @@ class Site(object): # Add event listeners def addEventListeners(self): - self.onFileStart = util.Event() # If WorkerManager added new task + self.onFileStart = util.Event() # If WorkerManager added new task self.onFileDone = util.Event() # If WorkerManager successfully downloaded a file self.onFileFail = util.Event() # If WorkerManager failed to download a file - self.onComplete = util.Event() # All file finished + self.onComplete = util.Event() # All files finished self.onFileStart.append(lambda inner_path: self.fileStarted()) # No parameters to make Noparallel batching working self.onFileDone.append(lambda inner_path: self.fileDone(inner_path)) @@ -1099,6 +1713,7 @@ class Site(object): param = None for ws in self.websockets: ws.event("siteChanged", self, param) + time.sleep(0.001) def messageWebsocket(self, message, type="info", progress=None): for ws in self.websockets: @@ -1113,7 +1728,7 @@ class Site(object): time.sleep(0.001) # Wait for other files adds self.updateWebsocket(file_started=True) - # File downloaded successful + # File downloaded successfully def fileDone(self, inner_path): # File downloaded, remove it from bad files if inner_path in self.bad_files: diff --git a/src/Site/SiteAnnouncer.py b/src/Site/SiteAnnouncer.py index 2fd63e82..1baf39af 100644 --- a/src/Site/SiteAnnouncer.py +++ b/src/Site/SiteAnnouncer.py @@ -1,6 +1,7 @@ import random import time import hashlib +import logging import re import collections @@ -12,6 +13,7 @@ from Debug import Debug from util import helper from greenlet import GreenletExit import util +from util import CircularIterator class AnnounceError(Exception): @@ -24,11 +26,20 @@ global_stats = collections.defaultdict(lambda: collections.defaultdict(int)) class SiteAnnouncer(object): def __init__(self, site): self.site = site + self.log = logging.getLogger("Site:%s SiteAnnouncer" % self.site.address_short) + self.stats = {} self.fileserver_port = config.fileserver_port self.peer_id = self.site.connection_server.peer_id - self.last_tracker_id = random.randint(0, 10) + self.tracker_circular_iterator = CircularIterator() self.time_last_announce = 0 + self.supported_tracker_count = 0 + + # Returns connection_server rela + # Since 0.8.0 + @property + def connection_server(self): + return self.site.connection_server def getTrackers(self): return config.trackers @@ -36,25 +47,76 @@ class SiteAnnouncer(object): def getSupportedTrackers(self): trackers = self.getTrackers() - if not self.site.connection_server.tor_manager.enabled: + if not self.connection_server.tor_manager.enabled: trackers = [tracker for tracker in trackers if ".onion" not in tracker] trackers = [tracker for tracker in trackers if self.getAddressParts(tracker)] # Remove trackers with unknown address - if "ipv6" not in self.site.connection_server.supported_ip_types: - trackers = [tracker for tracker in trackers if helper.getIpType(self.getAddressParts(tracker)["ip"]) != "ipv6"] + if "ipv6" not in self.connection_server.supported_ip_types: + trackers = [tracker for tracker in trackers if self.connection_server.getIpType(self.getAddressParts(tracker)["ip"]) != "ipv6"] return trackers - def getAnnouncingTrackers(self, mode): + # Returns a cached value of len(self.getSupportedTrackers()), which can be + # inacurate. + # To be used from Site for estimating available tracker count. + def getSupportedTrackerCount(self): + return self.supported_tracker_count + + def shouldTrackerBeTemporarilyIgnored(self, tracker, mode, force): + if not tracker: + return True + + if force: + return False + + now = time.time() + + # Throttle accessing unresponsive trackers + tracker_stats = global_stats[tracker] + delay = min(30 * tracker_stats["num_error"], 60 * 10) + time_announce_allowed = tracker_stats["time_request"] + delay + if now < time_announce_allowed: + return True + + return False + + def getAnnouncingTrackers(self, mode, force): trackers = self.getSupportedTrackers() - if trackers and (mode == "update" or mode == "more"): # Only announce on one tracker, increment the queried tracker id - self.last_tracker_id += 1 - self.last_tracker_id = self.last_tracker_id % len(trackers) - trackers_announcing = [trackers[self.last_tracker_id]] # We only going to use this one + self.supported_tracker_count = len(trackers) + + if trackers and (mode == "update" or mode == "more"): + + # Choose just 2 trackers to announce to + + trackers_announcing = [] + + # One is the next in sequence + + self.tracker_circular_iterator.resetSuccessiveCount() + while 1: + tracker = self.tracker_circular_iterator.next(trackers) + if not self.shouldTrackerBeTemporarilyIgnored(tracker, mode, force): + trackers_announcing.append(tracker) + break + if self.tracker_circular_iterator.isWrapped(): + break + + # And one is just random + + shuffled_trackers = random.sample(trackers, len(trackers)) + for tracker in shuffled_trackers: + if tracker in trackers_announcing: + continue + if not self.shouldTrackerBeTemporarilyIgnored(tracker, mode, force): + trackers_announcing.append(tracker) + break else: - trackers_announcing = trackers + trackers_announcing = [ + tracker for tracker in trackers + if not self.shouldTrackerBeTemporarilyIgnored(tracker, mode, force) + ] return trackers_announcing @@ -62,94 +124,32 @@ class SiteAnnouncer(object): back = [] # Type of addresses they can reach me if config.trackers_proxy == "disable" and config.tor != "always": - for ip_type, opened in list(self.site.connection_server.port_opened.items()): + for ip_type, opened in list(self.connection_server.port_opened.items()): if opened: back.append(ip_type) - if self.site.connection_server.tor_manager.start_onions: + if self.connection_server.tor_manager.start_onions: back.append("onion") return back - @util.Noparallel(blocking=False) + @util.Noparallel() def announce(self, force=False, mode="start", pex=True): + if not self.site.isServing(): + return + if time.time() - self.time_last_announce < 30 and not force: return # No reannouncing within 30 secs - if force: - self.site.log.debug("Force reannounce in mode %s" % mode) + + self.log.debug("announce: force=%s, mode=%s, pex=%s" % (force, mode, pex)) self.fileserver_port = config.fileserver_port self.time_last_announce = time.time() - trackers = self.getAnnouncingTrackers(mode) - - if config.verbose: - self.site.log.debug("Tracker announcing, trackers: %s" % trackers) - - errors = [] - slow = [] - s = time.time() - threads = [] - num_announced = 0 - - for tracker in trackers: # Start announce threads - tracker_stats = global_stats[tracker] - # Reduce the announce time for trackers that looks unreliable - time_announce_allowed = time.time() - 60 * min(30, tracker_stats["num_error"]) - if tracker_stats["num_error"] > 5 and tracker_stats["time_request"] > time_announce_allowed and not force: - if config.verbose: - self.site.log.debug("Tracker %s looks unreliable, announce skipped (error: %s)" % (tracker, tracker_stats["num_error"])) - continue - thread = self.site.greenlet_manager.spawn(self.announceTracker, tracker, mode=mode) - threads.append(thread) - thread.tracker = tracker - - time.sleep(0.01) - self.updateWebsocket(trackers="announcing") - - gevent.joinall(threads, timeout=20) # Wait for announce finish - - for thread in threads: - if thread.value is None: - continue - if thread.value is not False: - if thread.value > 1.0: # Takes more than 1 second to announce - slow.append("%.2fs %s" % (thread.value, thread.tracker)) - num_announced += 1 - else: - if thread.ready(): - errors.append(thread.tracker) - else: # Still running - slow.append("30s+ %s" % thread.tracker) - - # Save peers num - self.site.settings["peers"] = len(self.site.peers) - - if len(errors) < len(threads): # At least one tracker finished - if len(trackers) == 1: - announced_to = trackers[0] - else: - announced_to = "%s/%s trackers" % (num_announced, len(threads)) - if mode != "update" or config.verbose: - self.site.log.debug( - "Announced in mode %s to %s in %.3fs, errors: %s, slow: %s" % - (mode, announced_to, time.time() - s, errors, slow) - ) - else: - if len(threads) > 1: - self.site.log.error("Announce to %s trackers in %.3fs, failed" % (len(threads), time.time() - s)) - if len(threads) == 1 and mode != "start": # Move to next tracker - self.site.log.debug("Tracker failed, skipping to next one...") - self.site.greenlet_manager.spawnLater(1.0, self.announce, force=force, mode=mode, pex=pex) - - self.updateWebsocket(trackers="announced") + trackers = self.getAnnouncingTrackers(mode, force) + self.log.debug("Chosen trackers: %s" % trackers) + self.announceToTrackers(trackers, force=force, mode=mode) if pex: - self.updateWebsocket(pex="announcing") - if mode == "more": # Need more peers - self.announcePex(need_num=10) - else: - self.announcePex() - - self.updateWebsocket(pex="announced") + self.announcePex() def getTrackerHandler(self, protocol): return None @@ -177,7 +177,7 @@ class SiteAnnouncer(object): s = time.time() address_parts = self.getAddressParts(tracker) if not address_parts: - self.site.log.warning("Tracker %s error: Invalid address" % tracker) + self.log.warning("Tracker %s error: Invalid address" % tracker) return False if tracker not in self.stats: @@ -188,7 +188,7 @@ class SiteAnnouncer(object): self.stats[tracker]["time_request"] = time.time() global_stats[tracker]["time_request"] = time.time() if config.verbose: - self.site.log.debug("Tracker announcing to %s (mode: %s)" % (tracker, mode)) + self.log.debug("Tracker announcing to %s (mode: %s)" % (tracker, mode)) if mode == "update": num_want = 10 else: @@ -202,7 +202,7 @@ class SiteAnnouncer(object): else: raise AnnounceError("Unknown protocol: %s" % address_parts["protocol"]) except Exception as err: - self.site.log.warning("Tracker %s announce failed: %s in mode %s" % (tracker, Debug.formatException(err), mode)) + self.log.warning("Tracker %s announce failed: %s in mode %s" % (tracker, Debug.formatException(err), mode)) error = err if error: @@ -210,11 +210,11 @@ class SiteAnnouncer(object): self.stats[tracker]["time_status"] = time.time() self.stats[tracker]["last_error"] = str(error) self.stats[tracker]["time_last_error"] = time.time() - if self.site.connection_server.has_internet: + if self.connection_server.has_internet: self.stats[tracker]["num_error"] += 1 self.stats[tracker]["num_request"] += 1 global_stats[tracker]["num_request"] += 1 - if self.site.connection_server.has_internet: + if self.connection_server.has_internet: global_stats[tracker]["num_error"] += 1 self.updateWebsocket(tracker="error") return False @@ -249,39 +249,106 @@ class SiteAnnouncer(object): self.site.updateWebsocket(peers_added=added) if config.verbose: - self.site.log.debug( + self.log.debug( "Tracker result: %s://%s (found %s peers, new: %s, total: %s)" % (address_parts["protocol"], address_parts["address"], len(peers), added, len(self.site.peers)) ) return time.time() - s - @util.Noparallel(blocking=False) - def announcePex(self, query_num=2, need_num=5): - peers = self.site.getConnectedPeers() - if len(peers) == 0: # Wait 3s for connections - time.sleep(3) - peers = self.site.getConnectedPeers() + def announceToTrackers(self, trackers, force=False, mode="start"): + errors = [] + slow = [] + s = time.time() + threads = [] + num_announced = 0 - if len(peers) == 0: # Small number of connected peers for this site, connect to any - peers = list(self.site.getRecentPeers(20)) - need_num = 10 + for tracker in trackers: # Start announce threads + thread = self.site.greenlet_manager.spawn(self.announceTracker, tracker, mode=mode) + threads.append(thread) + thread.tracker = tracker - random.shuffle(peers) - done = 0 - total_added = 0 - for peer in peers: - num_added = peer.pex(need_num=need_num) - if num_added is not False: - done += 1 - total_added += num_added - if num_added: - self.site.worker_manager.onPeers() - self.site.updateWebsocket(peers_added=num_added) + time.sleep(0.01) + self.updateWebsocket(trackers="announcing") + + gevent.joinall(threads, timeout=20) # Wait for announce finish + + for thread in threads: + if thread.value is None: + continue + if thread.value is not False: + if thread.value > 1.0: # Takes more than 1 second to announce + slow.append("%.2fs %s" % (thread.value, thread.tracker)) + num_announced += 1 else: + if thread.ready(): + errors.append(thread.tracker) + else: # Still running + slow.append("30s+ %s" % thread.tracker) + + # Save peers num + self.site.settings["peers"] = len(self.site.peers) + + if len(errors) < len(threads): # At least one tracker finished + if len(trackers) == 1: + announced_to = trackers[0] + else: + announced_to = "%s/%s trackers" % (num_announced, len(threads)) + if mode != "update" or config.verbose: + self.log.debug( + "Announced in mode %s to %s in %.3fs, errors: %s, slow: %s" % + (mode, announced_to, time.time() - s, errors, slow) + ) + else: + if len(threads) > 1: + self.log.error("Announce to %s trackers in %.3fs, failed" % (len(threads), time.time() - s)) + if len(threads) > 1 and mode != "start": # Move to next tracker + self.log.debug("Tracker failed, skipping to next one...") + self.site.greenlet_manager.spawnLater(5.0, self.announce, force=force, mode=mode, pex=False) + + self.updateWebsocket(trackers="announced") + + @util.Noparallel(blocking=False) + def announcePex(self, query_num=2, need_num=10, establish_connections=True): + peers = [] + try: + peer_count = 20 + query_num * 2 + + # Wait for some peers to connect + for _ in range(5): + if not self.site.isServing(): + return + peers = self.site.getConnectedPeers(only_fully_connected=True) + if len(peers) > 0: + break + time.sleep(2) + + if len(peers) < peer_count and establish_connections: + # Small number of connected peers for this site, connect to any + peers = list(self.site.getRecentPeers(peer_count)) + + if len(peers) > 0: + self.updateWebsocket(pex="announcing") + + random.shuffle(peers) + done = 0 + total_added = 0 + for peer in peers: + if not establish_connections and not peer.isConnected(): + continue + num_added = peer.pex(need_num=need_num) + if num_added is not False: + done += 1 + total_added += num_added + if num_added: + self.site.worker_manager.onPeers() + self.site.updateWebsocket(peers_added=num_added) + if done == query_num: + break time.sleep(0.1) - if done == query_num: - break - self.site.log.debug("Pex result: from %s peers got %s new peers." % (done, total_added)) + self.log.debug("Pex result: from %s peers got %s new peers." % (done, total_added)) + finally: + if len(peers) > 0: + self.updateWebsocket(pex="announced") def updateWebsocket(self, **kwargs): if kwargs: diff --git a/src/Site/SiteHelpers.py b/src/Site/SiteHelpers.py new file mode 100644 index 00000000..90a298cf --- /dev/null +++ b/src/Site/SiteHelpers.py @@ -0,0 +1,256 @@ +import time +import weakref +import gevent + +class ConnectRequirement(object): + next_id = 1 + def __init__(self, need_nr_peers, need_nr_connected_peers, expiration_interval=None): + self.need_nr_peers = need_nr_peers # how many total peers we need + self.need_nr_connected_peers = need_nr_connected_peers # how many connected peers we need + self.result = gevent.event.AsyncResult() # resolves on need_nr_peers condition + self.result_connected = gevent.event.AsyncResult() # resolves on need_nr_connected_peers condition + + self.expiration_interval = expiration_interval + self.expired = False + if expiration_interval: + self.expire_at = time.time() + expiration_interval + else: + self.expire_at = None + + self.nr_peers = -1 # updated PeerConnector() + self.nr_connected_peers = -1 # updated PeerConnector() + + self.heartbeat = gevent.event.AsyncResult() + + self.id = type(self).next_id + type(self).next_id += 1 + + def fulfilled(self): + return self.result.ready() and self.result_connected.ready() + + def ready(self): + return self.expired or self.fulfilled() + + # Heartbeat sent when any of the following happens: + # * self.result is set + # * self.result_connected is set + # * self.nr_peers changed + # * self.nr_peers_connected changed + # * self.expired is set + def waitHeartbeat(self, timeout=None): + if self.heartbeat.ready(): + self.heartbeat = gevent.event.AsyncResult() + return self.heartbeat.wait(timeout=timeout) + + def sendHeartbeat(self): + self.heartbeat.set_result() + if self.heartbeat.ready(): + self.heartbeat = gevent.event.AsyncResult() + +class PeerConnector(object): + + def __init__(self, site): + self.site = site + + self.peer_reqs = weakref.WeakValueDictionary() # How many connected peers we need. + # Separate entry for each requirement. + # Objects of type ConnectRequirement. + self.peer_connector_controller = None # Thread doing the orchestration in background. + self.peer_connector_workers = dict() # Threads trying to connect to individual peers. + self.peer_connector_worker_limit = 5 # Max nr of workers. + self.peer_connector_announcer = None # Thread doing announces in background. + + # Max effective values. Set by processReqs(). + self.need_nr_peers = 0 + self.need_nr_connected_peers = 0 + self.nr_peers = 0 # set by processReqs() + self.nr_connected_peers = 0 # set by processReqs2() + + # Connector Controller state + self.peers = list() + + def addReq(self, req): + self.peer_reqs[req.id] = req + self.processReqs() + + def newReq(self, need_nr_peers, need_nr_connected_peers, expiration_interval=None): + req = ConnectRequirement(need_nr_peers, need_nr_connected_peers, expiration_interval=expiration_interval) + self.addReq(req) + return req + + def processReqs(self, nr_connected_peers=None): + nr_peers = len(self.site.peers) + self.nr_peers = nr_peers + + need_nr_peers = 0 + need_nr_connected_peers = 0 + + items = list(self.peer_reqs.items()) + for key, req in items: + send_heartbeat = False + + if req.expire_at and req.expire_at < time.time(): + req.expired = True + self.peer_reqs.pop(key, None) + send_heartbeat = True + elif req.result.ready() and req.result_connected.ready(): + pass + else: + if nr_connected_peers is not None: + if req.need_nr_peers <= nr_peers and req.need_nr_connected_peers <= nr_connected_peers: + req.result.set_result(nr_peers) + req.result_connected.set_result(nr_connected_peers) + send_heartbeat = True + if req.nr_peers != nr_peers or req.nr_connected_peers != nr_connected_peers: + req.nr_peers = nr_peers + req.nr_connected_peers = nr_connected_peers + send_heartbeat = True + + if not (req.result.ready() and req.result_connected.ready()): + need_nr_peers = max(need_nr_peers, req.need_nr_peers) + need_nr_connected_peers = max(need_nr_connected_peers, req.need_nr_connected_peers) + + if send_heartbeat: + req.sendHeartbeat() + + self.need_nr_peers = need_nr_peers + self.need_nr_connected_peers = need_nr_connected_peers + + if nr_connected_peers is None: + nr_connected_peers = 0 + if need_nr_peers > nr_peers: + self.spawnPeerConnectorAnnouncer(); + if need_nr_connected_peers > nr_connected_peers: + self.spawnPeerConnectorController(); + + def processReqs2(self): + self.nr_connected_peers = len(self.site.getConnectedPeers(only_fully_connected=True)) + self.processReqs(nr_connected_peers=self.nr_connected_peers) + + # For adding new peers when ConnectorController is working. + # While it is iterating over a cached list of peers, there can be a significant lag + # for a newly discovered peer to get in sight of the controller. + # Suppose most previously known peers are dead and we've just get a few + # new peers from a tracker. + # So we mix the new peer to the cached list. + # When ConnectorController is stopped (self.peers is empty), we just do nothing here. + def addPeer(self, peer): + if not self.peers: + return + if peer not in self.peers: + self.peers.append(peer) + + def deregisterPeer(self, peer): + try: + self.peers.remove(peer) + except: + pass + + def sleep(self, t): + self.site.connection_server.sleep(t) + + def keepGoing(self): + return self.site.isServing() and self.site.connection_server.allowsCreatingConnections() + + def peerConnectorWorker(self, peer): + if not peer.isConnected(): + peer.connect() + if peer.isConnected(): + peer.ping() + self.processReqs2() + + def peerConnectorController(self): + self.peers = list() + addendum = 20 + while self.keepGoing(): + + no_peers_loop = 0 + while len(self.site.peers) < 1: + # No peers at all. + # Waiting for the announcer to discover some peers. + self.sleep(10 + no_peers_loop) + no_peers_loop += 1 + if not self.keepGoing() or no_peers_loop > 60: + break + + self.processReqs2() + + if self.need_nr_connected_peers <= self.nr_connected_peers: + # Ok, nobody waits for connected peers. + # Done. + break + + if len(self.site.peers) < 1: + break + + if len(self.peers) < 1: + # refill the peer list + self.peers = self.site.getRecentPeers(self.need_nr_connected_peers * 2 + self.nr_connected_peers + addendum) + addendum = min(addendum * 2 + 50, 10000) + if len(self.peers) <= self.nr_connected_peers: + # Looks like all known peers are connected. + # Waiting for the announcer to discover some peers. + self.site.announcer.announcePex(establish_connections=False) + self.sleep(10) + continue + + added = 0 + + # try connecting to peers + while self.keepGoing() and len(self.peer_connector_workers) < self.peer_connector_worker_limit: + if len(self.peers) < 1: + break + + peer = self.peers.pop(0) + + if peer.isConnected(): + continue + + thread = self.peer_connector_workers.get(peer, None) + if thread: + continue + + thread = self.site.spawn(self.peerConnectorWorker, peer) + self.peer_connector_workers[peer] = thread + thread.link(lambda thread, peer=peer: self.peer_connector_workers.pop(peer, None)) + added += 1 + + if not self.keepGoing(): + break + + if not added: + # Looks like all known peers are either connected or being connected, + # so we weren't able to start connecting any peer in this iteration. + # Waiting for the announcer to discover some peers. + self.sleep(20) + + # wait for more room in self.peer_connector_workers + while self.keepGoing() and len(self.peer_connector_workers) >= self.peer_connector_worker_limit: + self.sleep(2) + + if not self.site.connection_server.isInternetOnline(): + self.sleep(30) + + self.peers = list() + self.peer_connector_controller = None + + def peerConnectorAnnouncer(self): + while self.keepGoing(): + if self.need_nr_peers <= self.nr_peers: + break + self.site.announce(mode="more") + self.processReqs2() + if self.need_nr_peers <= self.nr_peers: + break + self.sleep(10) + if not self.site.connection_server.isInternetOnline(): + self.sleep(20) + self.peer_connector_announcer = None + + def spawnPeerConnectorController(self): + if self.peer_connector_controller is None or self.peer_connector_controller.ready(): + self.peer_connector_controller = self.site.spawn(self.peerConnectorController) + + def spawnPeerConnectorAnnouncer(self): + if self.peer_connector_announcer is None or self.peer_connector_announcer.ready(): + self.peer_connector_announcer = self.site.spawn(self.peerConnectorAnnouncer) diff --git a/src/Site/SiteManager.py b/src/Site/SiteManager.py index 684d69fc..8175a1f5 100644 --- a/src/Site/SiteManager.py +++ b/src/Site/SiteManager.py @@ -4,6 +4,7 @@ import re import os import time import atexit +import collections import gevent @@ -27,6 +28,21 @@ class SiteManager(object): gevent.spawn(self.saveTimer) atexit.register(lambda: self.save(recalculate_size=True)) + # ZeroNet has a bug of desyncing between: + # * time sent in a response of listModified + # and + # * time checked on receiving a file. + # This leads to the following scenario: + # * Request listModified. + # * Detect that the remote peer missing an update + # * Send a newer version of the file back to the peer. + # * The peer responses "ok: File not changed" + # ..... + # * Request listModified the next time and do all the same again. + # So we keep the list of sent back entries to prevent sending multiple useless updates: + # "{site.address} - {peer.key} - {inner_path}" -> mtime + self.send_back_lru = collections.OrderedDict() + # Load all sites from data/sites.json @util.Noparallel() def load(self, cleanup=True, startup=False): @@ -155,6 +171,11 @@ class SiteManager(object): def resolveDomainCached(self, domain): return self.resolveDomain(domain) + # Checks if the address is blocked. To be implemented in content filter plugins. + # Since 0.8.0 + def isAddressBlocked(self, address): + return False + # Return: Site object or None if not found def get(self, address): if self.isDomainCached(address): @@ -216,6 +237,23 @@ class SiteManager(object): self.load(startup=True) return self.sites + # Return False if we never sent <inner_path> to <peer> + # or if the file that was sent was older than <remote_modified> + # so that send back logic is suppressed for <inner_path>. + # True if <inner_path> can be sent back to <peer>. + def checkSendBackLRU(self, site, peer, inner_path, remote_modified): + key = site.address + ' - ' + peer.key + ' - ' + inner_path + sent_modified = self.send_back_lru.get(key, 0) + return remote_modified < sent_modified + + def addToSendBackLRU(self, site, peer, inner_path, modified): + key = site.address + ' - ' + peer.key + ' - ' + inner_path + if self.send_back_lru.get(key, None) is None: + self.send_back_lru[key] = modified + while len(self.send_back_lru) > config.send_back_lru_size: + self.send_back_lru.popitem(last=False) + else: + self.send_back_lru.move_to_end(key, last=True) site_manager = SiteManager() # Singletone diff --git a/src/Site/SiteStorage.py b/src/Site/SiteStorage.py index c12a80b0..97f720dc 100644 --- a/src/Site/SiteStorage.py +++ b/src/Site/SiteStorage.py @@ -24,6 +24,25 @@ thread_pool_fs_read = ThreadPool.ThreadPool(config.threads_fs_read, name="FS rea thread_pool_fs_write = ThreadPool.ThreadPool(config.threads_fs_write, name="FS write") thread_pool_fs_batch = ThreadPool.ThreadPool(1, name="FS batch") +class VerifyFiles_Notificator(object): + def __init__(self, site, quick_check): + self.site = site + self.quick_check = quick_check + self.scanned_files = 0 + self.websocket_update_interval = 0.25 + self.websocket_update_time = time.time() + + def inc(self): + self.scanned_files += 1 + if self.websocket_update_time + self.websocket_update_interval < time.time(): + self.send() + + def send(self): + self.websocket_update_time = time.time() + if self.quick_check: + self.site.updateWebsocket(checking=self.scanned_files) + else: + self.site.updateWebsocket(verifying=self.scanned_files) @PluginManager.acceptPlugins class SiteStorage(object): @@ -356,7 +375,7 @@ class SiteStorage(object): # Reopen DB to check changes if self.has_db: self.closeDb("New dbschema") - gevent.spawn(self.getDb) + self.site.spawn(self.getDb) elif not config.disable_db and should_load_to_db and self.has_db: # Load json file to db if config.verbose: self.log.debug("Loading json file to db: %s (file: %s)" % (inner_path, file)) @@ -420,6 +439,8 @@ class SiteStorage(object): return inner_path # Verify all files sha512sum using content.json + # The result may not be accurate if self.site.isStopping(). + # verifyFiles() return immediately in that case. def verifyFiles(self, quick_check=False, add_optional=False, add_changed=True): bad_files = [] back = defaultdict(int) @@ -431,17 +452,55 @@ class SiteStorage(object): self.log.debug("VerifyFile content.json not exists") self.site.needFile("content.json", update=True) # Force update to fix corrupt file self.site.content_manager.loadContent() # Reload content.json - for content_inner_path, content in list(self.site.content_manager.contents.items()): + + # Trying to read self.site.content_manager.contents without being stuck + # on reading the long file list and also without getting + # "RuntimeError: dictionary changed size during iteration" + # We can't use just list(iteritems()) since it loads all the contents files + # at once and gets unresponsive. + contents = {} + notificator = None + tries = 0 + max_tries = 40 + stop = False + while not stop: + try: + contents = {} + notificator = VerifyFiles_Notificator(self.site, quick_check) + for content_inner_path, content in self.site.content_manager.contents.iteritems(): + notificator.inc() + contents[content_inner_path] = content + if self.site.isStopping(): + stop = True + break + stop = True + except RuntimeError as err: + if "changed size during iteration" in str(err): + tries += 1 + if tries >= max_tries: + self.log.info("contents.json file list changed during iteration. %s tries done. Giving up.", tries) + stop = True + self.log.info("contents.json file list changed during iteration. Trying again... (%s)", tries) + time.sleep(2 * tries) + else: + stop = True + + for content_inner_path, content in contents.items(): back["num_content"] += 1 i += 1 if i % 50 == 0: time.sleep(0.001) # Context switch to avoid gevent hangs + + if self.site.isStopping(): + break + if not os.path.isfile(self.getPath(content_inner_path)): # Missing content.json file back["num_content_missing"] += 1 self.log.debug("[MISSING] %s" % content_inner_path) bad_files.append(content_inner_path) for file_relative_path in list(content.get("files", {}).keys()): + notificator.inc() back["num_file"] += 1 file_inner_path = helper.getDirname(content_inner_path) + file_relative_path # Relative to site dir file_inner_path = file_inner_path.strip("/") # Strip leading / @@ -452,14 +511,19 @@ class SiteStorage(object): bad_files.append(file_inner_path) continue + err = None + if quick_check: - ok = os.path.getsize(file_path) == content["files"][file_relative_path]["size"] + file_size = os.path.getsize(file_path) + expected_size = content["files"][file_relative_path]["size"] + ok = file_size == expected_size if not ok: - err = "Invalid size" + err = "Invalid size: %s - actual, %s - expected" % (file_size, expected_size) else: try: ok = self.site.content_manager.verifyFile(file_inner_path, open(file_path, "rb")) - except Exception as err: + except Exception as err2: + err = err2 ok = False if not ok: @@ -472,6 +536,7 @@ class SiteStorage(object): optional_added = 0 optional_removed = 0 for file_relative_path in list(content.get("files_optional", {}).keys()): + notificator.inc() back["num_optional"] += 1 file_node = content["files_optional"][file_relative_path] file_inner_path = helper.getDirname(content_inner_path) + file_relative_path # Relative to site dir @@ -516,6 +581,8 @@ class SiteStorage(object): (content_inner_path, len(content["files"]), quick_check, optional_added, optional_removed) ) + notificator.send() + self.site.content_manager.contents.db.processDelayed() time.sleep(0.001) # Context switch to avoid gevent hangs return back diff --git a/src/Ui/UiServer.py b/src/Ui/UiServer.py index 61943ada..bda1daa0 100644 --- a/src/Ui/UiServer.py +++ b/src/Ui/UiServer.py @@ -167,7 +167,7 @@ class UiServer: self.log.error("Web interface bind error, must be running already, exiting.... %s" % err) import main main.file_server.stop() - self.log.debug("Stopped.") + self.log.info("Stopped.") def stop(self): self.log.debug("Stopping...") diff --git a/src/Ui/UiWebsocket.py b/src/Ui/UiWebsocket.py index 9865a1f1..60d746db 100644 --- a/src/Ui/UiWebsocket.py +++ b/src/Ui/UiWebsocket.py @@ -318,6 +318,7 @@ class UiWebsocket(object): back["updatesite"] = config.updatesite back["dist_type"] = config.dist_type back["lib_verify_best"] = CryptBitcoin.lib_verify_best + back["passive_mode"] = file_server.passive_mode return back def formatAnnouncerInfo(self, site): @@ -912,9 +913,9 @@ class UiWebsocket(object): self.response(to, "ok") # Update site content.json - def actionSiteUpdate(self, to, address, check_files=False, since=None, announce=False): + def actionSiteUpdate(self, to, address, check_files=False, verify_files=False, since=None, announce=False): def updateThread(): - site.update(announce=announce, check_files=check_files, since=since) + site.update(announce=announce, check_files=check_files, verify_files=verify_files, since=since) self.response(to, "Updated") site = self.server.sites.get(address) @@ -1164,6 +1165,32 @@ class UiWebsocket(object): file_server.portCheck() self.response(to, file_server.port_opened) + @flag.admin + @flag.no_multiuser + def actionServerSetPassiveMode(self, to, passive_mode=False): + import main + file_server = main.file_server + if file_server.isPassiveMode() != passive_mode: + file_server.setPassiveMode(passive_mode) + if file_server.isPassiveMode(): + self.cmd("notification", ["info", _["Passive mode enabled"], 5000]) + else: + self.cmd("notification", ["info", _["Passive mode disabled"], 5000]) + self.server.updateWebsocket() + + @flag.admin + @flag.no_multiuser + def actionServerSetOfflineMode(self, to, offline_mode=False): + import main + file_server = main.file_server + if file_server.isOfflineMode() != offline_mode: + file_server.setOfflineMode(offline_mode) + if file_server.isOfflineMode(): + self.cmd("notification", ["info", _["Offline mode enabled"], 5000]) + else: + self.cmd("notification", ["info", _["Offline mode disabled"], 5000]) + self.server.updateWebsocket() + @flag.admin @flag.no_multiuser def actionServerShutdown(self, to, restart=False): @@ -1174,7 +1201,7 @@ class UiWebsocket(object): return False if restart: main.restart_after_shutdown = True - main.file_server.stop() + main.file_server.stop(ui_websocket=self) main.ui_server.stop() if restart: diff --git a/src/loglevel_overrides.py b/src/loglevel_overrides.py new file mode 100644 index 00000000..5622e523 --- /dev/null +++ b/src/loglevel_overrides.py @@ -0,0 +1,9 @@ +# This file is for adding rules for selectively enabling debug logging +# when working on the code. +# Add your rules here and skip this file when committing changes. + +#import re +#from util import SelectiveLogger +# +#SelectiveLogger.addLogLevelRaisingRule("ConnServer") +#SelectiveLogger.addLogLevelRaisingRule(re.compile(r'^Site:')) diff --git a/src/main.py b/src/main.py index 6ba85052..21424d47 100644 --- a/src/main.py +++ b/src/main.py @@ -4,6 +4,7 @@ import sys import stat import time import logging +import loglevel_overrides startup_errors = [] def startupError(msg): @@ -154,7 +155,7 @@ class Actions(object): logging.info("Starting servers....") gevent.joinall([gevent.spawn(ui_server.start), gevent.spawn(file_server.start)]) - logging.info("All server stopped") + logging.info("All servers stopped") # Site commands diff --git a/src/util/CircularIterator.py b/src/util/CircularIterator.py new file mode 100644 index 00000000..3466092e --- /dev/null +++ b/src/util/CircularIterator.py @@ -0,0 +1,34 @@ +import random + +class CircularIterator: + def __init__(self): + self.successive_count = 0 + self.last_size = 0 + self.index = -1 + + def next(self, items): + self.last_size = len(items) + + if self.last_size == 0: + return None + + if self.index < 0: + self.index = random.randint(0, self.last_size) + else: + self.index += 1 + + self.index = self.index % self.last_size + + self.successive_count += 1 + + return items[self.index] + + def resetSuccessiveCount(self): + self.successive_count = 0 + + def getSuccessiveCount(self): + return self.successive_count + + def isWrapped(self): + return self.successive_count >= self.last_size + diff --git a/src/util/GreenletManager.py b/src/util/GreenletManager.py index e024233d..d711d09a 100644 --- a/src/util/GreenletManager.py +++ b/src/util/GreenletManager.py @@ -3,17 +3,37 @@ from Debug import Debug class GreenletManager: - def __init__(self): + # pool is either gevent.pool.Pool or GreenletManager. + # if pool is None, new gevent.pool.Pool() is created. + def __init__(self, pool=None): self.greenlets = set() + if not pool: + pool = gevent.pool.Pool(None) + self.pool = pool + + def _spawn_later(self, seconds, *args, **kwargs): + # If pool is another GreenletManager, delegate to it. + if hasattr(self.pool, 'spawnLater'): + return self.pool.spawnLater(seconds, *args, **kwargs) + + # There's gevent.spawn_later(), but there isn't gevent.pool.Pool.spawn_later(). + # Doing manually. + greenlet = self.pool.greenlet_class(*args, **kwargs) + self.pool.add(greenlet) + greenlet.start_later(seconds) + return greenlet + + def _spawn(self, *args, **kwargs): + return self.pool.spawn(*args, **kwargs) def spawnLater(self, *args, **kwargs): - greenlet = gevent.spawn_later(*args, **kwargs) + greenlet = self._spawn_later(*args, **kwargs) greenlet.link(lambda greenlet: self.greenlets.remove(greenlet)) self.greenlets.add(greenlet) return greenlet def spawn(self, *args, **kwargs): - greenlet = gevent.spawn(*args, **kwargs) + greenlet = self._spawn(*args, **kwargs) greenlet.link(lambda greenlet: self.greenlets.remove(greenlet)) self.greenlets.add(greenlet) return greenlet diff --git a/src/util/SafeRe.py b/src/util/SafeRe.py index 6018e2d3..8c394a84 100644 --- a/src/util/SafeRe.py +++ b/src/util/SafeRe.py @@ -1,10 +1,16 @@ import re +import logging + +log = logging.getLogger("SafeRe") + class UnsafePatternError(Exception): pass +max_cache_size = 1000 cached_patterns = {} +old_cached_patterns = {} def isSafePattern(pattern): @@ -15,18 +21,78 @@ def isSafePattern(pattern): if unsafe_pattern_match: raise UnsafePatternError("Potentially unsafe part of the pattern: %s in %s" % (unsafe_pattern_match.group(0), pattern)) - repetitions = re.findall(r"\.[\*\{\+]", pattern) - if len(repetitions) >= 10: - raise UnsafePatternError("More than 10 repetitions of %s in %s" % (repetitions[0], pattern)) + repetitions1 = re.findall(r"\.[\*\{\+]", pattern) + repetitions2 = re.findall(r"[^(][?]", pattern) + if len(repetitions1) + len(repetitions2) >= 10: + raise UnsafePatternError("More than 10 repetitions in %s" % pattern) return True -def match(pattern, *args, **kwargs): +def compilePattern(pattern): + global cached_patterns + global old_cached_patterns + cached_pattern = cached_patterns.get(pattern) if cached_pattern: - return cached_pattern.match(*args, **kwargs) - else: - if isSafePattern(pattern): - cached_patterns[pattern] = re.compile(pattern) - return cached_patterns[pattern].match(*args, **kwargs) + return cached_pattern + + cached_pattern = old_cached_patterns.get(pattern) + if cached_pattern: + del old_cached_patterns[pattern] + cached_patterns[pattern] = cached_pattern + return cached_pattern + + if isSafePattern(pattern): + cached_pattern = re.compile(pattern) + cached_patterns[pattern] = cached_pattern + log.debug("Compiled new pattern: %s" % pattern) + log.debug("Cache size: %d + %d" % (len(cached_patterns), len(old_cached_patterns))) + + if len(cached_patterns) > max_cache_size: + old_cached_patterns = cached_patterns + cached_patterns = {} + log.debug("Size limit reached. Rotating cache.") + log.debug("Cache size: %d + %d" % (len(cached_patterns), len(old_cached_patterns))) + + return cached_pattern + + +def match(pattern, *args, **kwargs): + cached_pattern = compilePattern(pattern) + return cached_pattern.match(*args, **kwargs) + +################################################################################ + +# TESTS + +def testSafePattern(pattern): + try: + return isSafePattern(pattern) + except UnsafePatternError as err: + return False + + +# Some real examples to make sure it works as expected +assert testSafePattern('(data/mp4/.*|updater/.*)') +assert testSafePattern('((js|css)/(?!all.(js|css)))|.git') + + +# Unsafe cases: + +# ((?!json).)*$ not allowed, because of ) before the * character. Possible fix: .*(?!json)$ +assert not testSafePattern('((?!json).)*$') +assert testSafePattern('.*(?!json)$') + +# (.*.epub|.*.jpg|.*.jpeg|.*.png|data/.*.gif|.*.avi|.*.ogg|.*.webm|.*.mp4|.*.mp3|.*.mkv|.*.eot) not allowed, because it has 12 .* repetition patterns. Possible fix: .*(epub|jpg|jpeg|png|data/gif|avi|ogg|webm|mp4|mp3|mkv|eot) +assert not testSafePattern('(.*.epub|.*.jpg|.*.jpeg|.*.png|data/.*.gif|.*.avi|.*.ogg|.*.webm|.*.mp4|.*.mp3|.*.mkv|.*.eot)') +assert testSafePattern('.*(epub|jpg|jpeg|png|data/gif|avi|ogg|webm|mp4|mp3|mkv|eot)') + +# https://github.com/HelloZeroNet/ZeroNet/issues/2757 +assert not testSafePattern('a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') +assert not testSafePattern('a?a?a?a?a?a?a?x.{0,1}x.{0,1}x.{0,1}') +assert testSafePattern('a?a?a?a?a?a?a?x.{0,1}x.{0,1}') +assert not testSafePattern('a?a?a?a?a?a?a?x.*x.*x.*') +assert testSafePattern('a?a?a?a?a?a?a?x.*x.*') + +################################################################################ diff --git a/src/util/SelectiveLogger.py b/src/util/SelectiveLogger.py new file mode 100644 index 00000000..fcdcba0a --- /dev/null +++ b/src/util/SelectiveLogger.py @@ -0,0 +1,43 @@ +import logging +import re + +log_level_raising_rules = [] + +def addLogLevelRaisingRule(rule, level=None): + if level is None: + level = logging.INFO + log_level_raising_rules.append({ + "rule": rule, + "level": level + }) + +def matchLogLevelRaisingRule(name): + for rule in log_level_raising_rules: + if isinstance(rule["rule"], re.Pattern): + if rule["rule"].search(name): + return rule["level"] + else: + if rule["rule"] == name: + return rule["level"] + return None + +class SelectiveLogger(logging.getLoggerClass()): + def __init__(self, name, level=logging.NOTSET): + return super().__init__(name, level) + + def raiseLevel(self, level): + raised_level = matchLogLevelRaisingRule(self.name) + if raised_level is not None: + if level < raised_level: + level = raised_level + return level + + def isEnabledFor(self, level): + level = self.raiseLevel(level) + return super().isEnabledFor(level) + + def _log(self, level, msg, args, **kwargs): + level = self.raiseLevel(level) + return super()._log(level, msg, args, **kwargs) + +logging.setLoggerClass(SelectiveLogger) diff --git a/src/util/__init__.py b/src/util/__init__.py index ab8a8b88..f00c1459 100644 --- a/src/util/__init__.py +++ b/src/util/__init__.py @@ -1,4 +1,5 @@ from .Cached import Cached +from .CircularIterator import CircularIterator from .Event import Event from .Noparallel import Noparallel from .Pooled import Pooled diff --git a/src/util/helper.py b/src/util/helper.py index 61455b08..f44bcfce 100644 --- a/src/util/helper.py +++ b/src/util/helper.py @@ -290,7 +290,8 @@ local_ip_pattern = re.compile(r"^127\.|192\.168\.|10\.|172\.1[6-9]\.|172\.2[0-9] def isPrivateIp(ip): return local_ip_pattern.match(ip) - +# XXX: Deprecated. Use ConnectionServer.getIpType() instead. +# To be removed in 0.9.0 def getIpType(ip): if ip.endswith(".onion"): return "onion"