SleekXMPP/sleekxmpp/xmlstream/xmlstream.py

489 lines
15 KiB
Python
Raw Normal View History

2010-03-26 21:32:16 +00:00
"""
2010-06-02 02:51:49 +00:00
SleekXMPP: The Sleek XMPP Library
Copyright (C) 2010 Nathanael C. Fritz
This file is part of SleekXMPP.
2010-03-26 21:32:16 +00:00
2010-06-02 02:51:49 +00:00
See the file license.txt for copying permission.
2010-03-26 21:32:16 +00:00
"""
2010-01-08 06:03:02 +00:00
from __future__ import with_statement, unicode_literals
try:
import queue
except ImportError:
import Queue as queue
2009-06-03 22:56:51 +00:00
from . import statemachine
from . stanzabase import StanzaBase
from xml.etree import cElementTree
from xml.parsers import expat
import logging
2010-06-02 18:18:09 +00:00
import random
2009-06-03 22:56:51 +00:00
import socket
import threading
2009-06-03 22:56:51 +00:00
import time
import traceback
import types
import xml.sax.saxutils
2010-05-27 01:32:28 +00:00
from . import scheduler
2009-06-03 22:56:51 +00:00
HANDLER_THREADS = 1
2009-06-03 22:56:51 +00:00
ssl_support = True
2010-01-08 06:03:02 +00:00
#try:
import ssl
#except ImportError:
# ssl_support = False
import sys
if sys.version_info < (3, 0):
#monkey patch broken filesocket object
from . import filesocket
#socket._fileobject = filesocket.filesocket
2009-06-03 22:56:51 +00:00
class RestartStream(Exception):
pass
stanza_extensions = {}
2010-06-02 18:18:09 +00:00
RECONNECT_MAX_DELAY = 3600
RECONNECT_QUIESCE_FACTOR = 1.6180339887498948 # Phi
RECONNECT_QUIESCE_JITTER = 0.11962656472 # molar Planck constant times c, joule meter/mole
2009-06-03 22:56:51 +00:00
class XMLStream(object):
"A connection manager with XML events."
def __init__(self, socket=None, host='', port=0, escape_quotes=False):
global ssl_support
self.ssl_support = ssl_support
self.escape_quotes = escape_quotes
self.state = statemachine.StateMachine(('disconnected','connected'))
self.should_reconnect = True
2009-06-03 22:56:51 +00:00
self.setSocket(socket)
self.address = (host, int(port))
self.__thread = {}
self.__root_stanza = []
2009-06-03 22:56:51 +00:00
self.__stanza = {}
self.__stanza_extension = {}
self.__handlers = []
self.__tls_socket = None
self.filesocket = None
2009-06-03 22:56:51 +00:00
self.use_ssl = False
self.use_tls = False
self.ca_certs=None
2009-06-03 22:56:51 +00:00
self.stream_header = "<stream>"
self.stream_footer = "</stream>"
self.eventqueue = queue.Queue()
2010-06-18 13:51:29 +00:00
self.sendqueue = queue.PriorityQueue()
2010-05-29 02:19:28 +00:00
self.scheduler = scheduler.Scheduler(self.eventqueue)
2009-06-03 22:56:51 +00:00
self.namespace_map = {}
self.run = True
2009-06-03 22:56:51 +00:00
def setSocket(self, socket):
"Set the socket"
self.socket = socket
if socket is not None:
with self.state.transition_ctx('disconnected','connected') as locked:
if not locked: raise Exception('Already connected')
# ElementTree.iterparse requires a file. 0 buffer files have to be binary
self.filesocket = socket.makefile('rb', 0)
2009-06-03 22:56:51 +00:00
def setFileSocket(self, filesocket):
self.filesocket = filesocket
def connect(self, host='', port=0, use_ssl=None, use_tls=None):
"Establish a socket connection to the given XMPP server."
if not self.state.transition('disconnected','connected',
func=self.connectTCP, args=[host, port, use_ssl, use_tls] ):
if self.state['connected']: logging.debug('Already connected')
else: logging.warning("Connection failed" )
return False
logging.debug('Connection complete.')
return True
# TODO currently a caller can't distinguish between "connection failed" and
# "we're already trying to connect from another thread"
2009-06-03 22:56:51 +00:00
def connectTCP(self, host='', port=0, use_ssl=None, use_tls=None, reattempt=True):
"Connect and create socket"
2010-06-02 18:18:09 +00:00
# Note that this is thread-safe by merit of being called solely from connect() which
# holds the state lock.
delay = 1.0 # reconnection delay
while self.run:
2010-06-02 02:51:49 +00:00
logging.debug('connecting....')
try:
if host and port:
self.address = (host, int(port))
if use_ssl is not None:
self.use_ssl = use_ssl
if use_tls is not None:
# TODO this variable doesn't seem to be used for anything!
2010-06-02 02:51:49 +00:00
self.use_tls = use_tls
if sys.version_info < (3, 0):
self.socket = filesocket.Socket26(socket.AF_INET, socket.SOCK_STREAM)
else:
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.socket.settimeout(None) #10)
2010-06-02 18:18:09 +00:00
2010-06-02 02:51:49 +00:00
if self.use_ssl and self.ssl_support:
logging.debug("Socket Wrapped for SSL")
self.socket = ssl.wrap_socket(self.socket,ca_certs=self.ca_certs)
2010-06-02 18:18:09 +00:00
2009-06-03 22:56:51 +00:00
self.socket.connect(self.address)
self.filesocket = self.socket.makefile('rb', 0)
2010-06-02 18:18:09 +00:00
2009-06-03 22:56:51 +00:00
return True
2010-06-02 18:18:09 +00:00
except socket.error as serr:
2010-06-02 18:18:09 +00:00
logging.exception("Socket Error #%s: %s", serr.errno, serr.strerror)
if not reattempt: return False
except:
logging.exception("Connection error")
if not reattempt: return False
# quiesce if rconnection fails:
# This algorithm based loosely on Twisted internet.protocol
2010-06-02 18:18:09 +00:00
# http://twistedmatrix.com/trac/browser/trunk/twisted/internet/protocol.py#L310
delay = min(delay * RECONNECT_QUIESCE_FACTOR, RECONNECT_MAX_DELAY)
2010-06-02 18:18:09 +00:00
delay = random.normalvariate(delay, delay * RECONNECT_QUIESCE_JITTER)
logging.debug('Waiting %fs until next reconnect attempt...', delay)
time.sleep(delay)
2009-06-03 22:56:51 +00:00
def connectUnix(self, filepath):
"Connect to Unix file and create socket"
def startTLS(self):
"Handshakes for TLS"
if self.ssl_support:
logging.info("Negotiating TLS")
2010-06-02 02:51:49 +00:00
# self.realsocket = self.socket # NOT USED
self.socket = ssl.wrap_socket(self.socket,
2010-06-02 02:51:49 +00:00
ssl_version=ssl.PROTOCOL_TLSv1,
do_handshake_on_connect=False,
ca_certs=self.ca_certs)
self.socket.do_handshake()
2010-01-08 06:03:02 +00:00
if sys.version_info < (3,0):
from . filesocket import filesocket
self.filesocket = filesocket(self.socket)
else:
self.filesocket = self.socket.makefile('rb', 0)
2010-06-02 02:51:49 +00:00
logging.debug("TLS negotitation successful")
2009-06-03 22:56:51 +00:00
return True
else:
logging.warning("Tried to enable TLS, but ssl module not found.")
2009-06-03 22:56:51 +00:00
return False
raise RestartStream()
def process(self, threaded=True):
2010-05-27 01:32:28 +00:00
self.scheduler.process(threaded=True)
2010-06-02 02:51:49 +00:00
self.run = True
for t in range(0, HANDLER_THREADS):
2010-05-12 20:51:14 +00:00
th = threading.Thread(name='eventhandle%s' % t, target=self._eventRunner)
th.setDaemon(True)
self.__thread['eventhandle%s' % t] = th
th.start()
th = threading.Thread(name='sendthread', target=self._sendThread)
th.setDaemon(True)
self.__thread['sendthread'] = th
th.start()
2009-06-03 22:56:51 +00:00
if threaded:
2010-05-12 20:51:14 +00:00
th = threading.Thread(name='process', target=self._process)
th.setDaemon(True)
self.__thread['process'] = th
th.start()
2009-06-03 22:56:51 +00:00
else:
self._process()
2010-05-27 01:32:28 +00:00
def schedule(self, name, seconds, callback, args=None, kwargs=None, repeat=False):
self.scheduler.add(name, seconds, callback, args, kwargs, repeat, qpointer=self.eventqueue)
2009-06-03 22:56:51 +00:00
def _process(self):
"Start processing the socket."
2010-06-02 02:51:49 +00:00
logging.debug('Process thread starting...')
while self.run:
if not self.state.ensure('connected',wait=2): continue
2009-06-03 22:56:51 +00:00
try:
2010-06-18 13:51:29 +00:00
self.sendPriorityRaw(self.stream_header)
2010-06-02 02:51:49 +00:00
while self.run and self.__readXML(): pass
except socket.timeout:
logging.debug('socket rcv timeout')
pass
except RestartStream:
logging.debug("Restarting stream...")
continue # DON'T re-initialize the stream -- this exception is sent
# specifically when we've initialized TLS and need to re-send the <stream> header.
except (KeyboardInterrupt, SystemExit):
logging.debug("System interrupt detected")
self.shutdown()
self.eventqueue.put(('quit', None, None))
except cElementTree.XMLParserError: #if there is an xml parsing exception, assume stream needs to be restarted
logging.warn('XML RCV parsing error!', exc_info=1)
if self.should_reconnect: self.disconnect(reconnect=True)
else: self.disconnect()
2009-06-03 22:56:51 +00:00
except:
2010-06-02 02:51:49 +00:00
logging.exception('Unexpected error in RCV thread')
if self.should_reconnect: self.disconnect(reconnect=True)
else: self.disconnect()
logging.debug('Quitting Process thread')
2009-06-03 22:56:51 +00:00
def __readXML(self):
"Parses the incoming stream, adding to xmlin queue as it goes"
#build cElementTree object from expat was we go
2010-03-15 17:19:45 +00:00
#self.filesocket = self.socket.makefile('rb', 0)
2010-01-08 06:03:02 +00:00
#print self.filesocket.read(1024) #self.filesocket._sock.recv(1024)
2009-06-03 22:56:51 +00:00
edepth = 0
root = None
for (event, xmlobj) in cElementTree.iterparse(self.filesocket, (b'end', b'start')):
2009-06-03 22:56:51 +00:00
if edepth == 0: # and xmlobj.tag.split('}', 1)[-1] == self.basetag:
if event == b'start':
2009-06-03 22:56:51 +00:00
root = xmlobj
2010-06-02 02:51:49 +00:00
logging.debug('handling start stream')
2009-06-03 22:56:51 +00:00
self.start_stream_handler(root)
if event == b'end':
2009-06-03 22:56:51 +00:00
edepth += -1
if edepth == 0 and event == b'end':
2010-06-02 02:51:49 +00:00
# what is this case exactly? Premature EOF?
logging.debug("Ending readXML loop")
2009-06-03 22:56:51 +00:00
return False
elif edepth == 1:
#self.xmlin.put(xmlobj)
2010-06-02 02:51:49 +00:00
self.__spawnEvent(xmlobj)
if root: root.clear()
if event == b'start':
2009-06-03 22:56:51 +00:00
edepth += 1
2010-06-02 02:51:49 +00:00
logging.debug("Exiting readXML loop")
return False
2009-06-03 22:56:51 +00:00
def _sendThread(self):
2010-06-02 02:51:49 +00:00
logging.debug('send thread starting...')
2010-02-27 02:02:08 +00:00
while self.run:
if not self.state.ensure('connected',wait=2): continue
2010-06-02 02:51:49 +00:00
data = None
try:
2010-06-18 13:51:29 +00:00
data = self.sendqueue.get(True,5)[1]
2010-06-02 02:51:49 +00:00
logging.debug("SEND: %s" % data)
self.socket.sendall(data.encode('utf-8'))
except queue.Empty:
# logging.debug('Nothing on send queue')
pass
2010-06-02 02:51:49 +00:00
except socket.timeout:
# this is to prevent a thread blocked indefinitely
2010-06-02 02:51:49 +00:00
logging.debug('timeout sending packet data')
except:
2010-04-14 02:35:47 +00:00
logging.warning("Failed to send %s" % data)
2010-06-02 02:51:49 +00:00
logging.exception("Socket error in SEND thread")
# TODO it's somewhat unsafe for the sender thread to assume it can just
# re-intitialize the connection, since the receiver thread could be doing
# the same thing concurrently. Oops! The safer option would be to throw
# some sort of event that could be handled by a common thread or the reader
# thread to perform reconnect and then re-initialize the handler threads as well.
if self.should_reconnect:
self.disconnect(reconnect=True)
2009-06-03 22:56:51 +00:00
def sendRaw(self, data):
2010-06-18 13:51:29 +00:00
self.sendqueue.put((1, data))
return True
def sendPriorityRaw(self, data):
self.sendqueue.put((0, data))
2009-06-03 22:56:51 +00:00
return True
def disconnect(self, reconnect=False):
with self.state.transition_ctx('connected','disconnected') as locked:
if not locked:
logging.warning("Already disconnected.")
return
logging.debug("Disconnecting...")
self.sendRaw(self.stream_footer)
time.sleep(5)
#send end of stream
#wait for end of stream back
try:
# self.socket.shutdown(socket.SHUT_RDWR)
self.socket.close()
except socket.error as (errno,strerror):
logging.exception("Error while disconnecting. Socket Error #%s: %s" % (errno, strerror))
try:
self.filesocket.close()
except socket.error as (errno,strerror):
logging.exception("Error closing filesocket.")
if reconnect: self.connect()
2009-06-03 22:56:51 +00:00
def shutdown(self):
'''
Disconnects and shuts down all event threads.
'''
self.disconnect()
self.run = False
self.scheduler.run = False
2010-06-02 02:51:49 +00:00
2009-07-11 21:46:31 +00:00
def incoming_filter(self, xmlobj):
return xmlobj
2010-06-02 02:51:49 +00:00
2009-06-03 22:56:51 +00:00
def __spawnEvent(self, xmlobj):
"watching xmlOut and processes handlers"
#convert XML into Stanza
2010-06-02 02:51:49 +00:00
# TODO surround this log statement with an if, it's expensive
2009-09-05 07:38:29 +00:00
logging.debug("RECV: %s" % cElementTree.tostring(xmlobj))
2009-07-11 21:46:31 +00:00
xmlobj = self.incoming_filter(xmlobj)
2009-06-03 22:56:51 +00:00
stanza = None
for stanza_class in self.__root_stanza:
if xmlobj.tag == "{%s}%s" % (self.default_ns, stanza_class.name):
#if self.__root_stanza[stanza_class].match(xmlobj):
2009-06-03 22:56:51 +00:00
stanza = stanza_class(self, xmlobj)
break
if stanza is None:
stanza = StanzaBase(self, xmlobj)
2009-12-22 10:05:53 +00:00
unhandled = True
2010-06-02 02:51:49 +00:00
# TODO inefficient linear search; performance might be improved by hashtable lookup
2009-06-03 22:56:51 +00:00
for handler in self.__handlers:
if handler.match(stanza):
2010-07-01 19:11:02 +00:00
# logging.debug('matched stanza to handler %s', handler.name)
handler.prerun(stanza)
self.eventqueue.put(('stanza', handler, stanza))
2010-06-02 02:51:49 +00:00
if handler.checkDelete():
2010-07-01 19:11:02 +00:00
# logging.debug('deleting callback %s', handler.name)
2010-06-02 02:51:49 +00:00
self.__handlers.pop(self.__handlers.index(handler))
2009-12-22 10:05:53 +00:00
unhandled = False
if unhandled:
stanza.unhandled()
2009-06-03 22:56:51 +00:00
#loop through handlers and test match
#spawn threads as necessary, call handlers, sending Stanza
2010-06-02 02:51:49 +00:00
def _eventRunner(self):
logging.debug("Loading event runner")
while self.run:
try:
event = self.eventqueue.get(True, timeout=5)
except queue.Empty:
# logging.debug('Nothing on event queue')
event = None
if event is not None:
2010-01-08 06:03:02 +00:00
etype = event[0]
handler = event[1]
args = event[2:]
2010-06-02 02:51:49 +00:00
#etype, handler, *args = event #python 3.x way
if etype == 'stanza':
try:
handler.run(args[0])
except Exception as e:
2010-06-02 02:51:49 +00:00
logging.exception("Exception in event handler")
args[0].exception(e)
2010-06-02 02:51:49 +00:00
elif etype == 'sched':
try:
2010-06-02 02:51:49 +00:00
#handler(*args[0])
handler.run(*args)
except:
logging.error(traceback.format_exc())
elif etype == 'quit':
logging.debug("Quitting eventRunner thread")
return False
2010-06-02 02:51:49 +00:00
2009-06-03 22:56:51 +00:00
def registerHandler(self, handler, before=None, after=None):
"Add handler with matcher class and parameters."
self.__handlers.append(handler)
2010-06-02 02:51:49 +00:00
2009-06-03 22:56:51 +00:00
def removeHandler(self, name):
"Removes the handler."
idx = 0
for handler in self.__handlers:
if handler.name == name:
self.__handlers.pop(idx)
return
idx += 1
def registerStanza(self, stanza_class):
2009-06-03 22:56:51 +00:00
"Adds stanza. If root stanzas build stanzas sent in events while non-root stanzas build substanza objects."
self.__root_stanza.append(stanza_class)
2009-06-03 22:56:51 +00:00
def registerStanzaExtension(self, stanza_class, stanza_extension):
if stanza_class not in stanza_extensions:
stanza_extensions[stanza_class] = [stanza_extension]
else:
stanza_extensions[stanza_class].append(stanza_extension)
def removeStanza(self, stanza_class, root=False):
"Removes the stanza's registration."
if root:
del self.__root_stanza[stanza_class]
else:
del self.__stanza[stanza_class]
def removeStanzaExtension(self, stanza_class, stanza_extension):
stanza_extension[stanza_class].pop(stanza_extension)
def tostring(self, xml, xmlns='', stringbuffer=''):
newoutput = [stringbuffer]
#TODO respect ET mapped namespaces
itag = xml.tag.split('}', 1)[-1]
if '}' in xml.tag:
ixmlns = xml.tag.split('}', 1)[0][1:]
else:
ixmlns = ''
nsbuffer = ''
if xmlns != ixmlns and ixmlns != '':
if ixmlns in self.namespace_map:
if self.namespace_map[ixmlns] != '':
itag = "%s:%s" % (self.namespace_map[ixmlns], itag)
else:
nsbuffer = """ xmlns="%s\"""" % ixmlns
newoutput.append("<%s" % itag)
newoutput.append(nsbuffer)
for attrib in xml.attrib:
newoutput.append(""" %s="%s\"""" % (attrib, self.xmlesc(xml.attrib[attrib])))
if len(xml) or xml.text or xml.tail:
newoutput.append(">")
if xml.text:
newoutput.append(self.xmlesc(xml.text))
if len(xml):
for child in xml.getchildren():
newoutput.append(self.tostring(child, ixmlns))
newoutput.append("</%s>" % (itag, ))
if xml.tail:
newoutput.append(self.xmlesc(xml.tail))
elif xml.text:
newoutput.append(">%s</%s>" % (self.xmlesc(xml.text), itag))
else:
newoutput.append(" />")
return ''.join(newoutput)
def xmlesc(self, text):
text = list(text)
2009-06-03 22:56:51 +00:00
cc = 0
matches = ('&', '<', '"', '>', "'")
for c in text:
if c in matches:
if c == '&':
text[cc] = '&amp;'
2009-06-03 22:56:51 +00:00
elif c == '<':
text[cc] = '&lt;'
2009-06-03 22:56:51 +00:00
elif c == '>':
text[cc] = '&gt;'
2009-06-03 22:56:51 +00:00
elif c == "'":
text[cc] = '&apos;'
2009-06-03 22:56:51 +00:00
elif self.escape_quotes:
text[cc] = '&quot;'
2009-06-03 22:56:51 +00:00
cc += 1
return ''.join(text)
def start_stream_handler(self, xml):
"""Meant to be overridden"""
2010-06-02 02:51:49 +00:00
logging.warn("No start stream handler has been implemented.")