From 169e85dbc13dcaae8a699618883e512614f540b7 Mon Sep 17 00:00:00 2001 From: Simon McVittie Date: Fri, 27 Apr 2018 11:09:07 +0100 Subject: [PATCH] avahi-python: Encode unicode strings as UTF-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, we would effectively encode anything representable in Latin-1 as Latin-1, and crash on anything not representable in Latin-1: >>> import avahi >>> avahi.string_to_byte_array(u'©') [dbus.Byte(169)] >>> avahi.string_to_byte_array(u'\ufeff') Traceback (most recent call last): File "", line 1, in File "/usr/lib/python2.7/dist-packages/avahi/__init__.py", line 94, in string_to_byte_array r.append(dbus.Byte(ord(c))) ValueError: Integer outside range 0-255 This is particularly important for Python 3, where the str type is a Unicode string. The b'' syntax for bytestrings is supported since at least Python 2.7. These functions now accept either Unicode strings (Python 2 unicode, Python 3 str), which are encoded in UTF-8, or bytestrings (Python 2 str, Python 3 bytes) which are taken as-is. Signed-off-by: Simon McVittie --- avahi-python/avahi/__init__.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/avahi-python/avahi/__init__.py b/avahi-python/avahi/__init__.py index 7b450293..02305b02 100644 --- a/avahi-python/avahi/__init__.py +++ b/avahi-python/avahi/__init__.py @@ -17,6 +17,8 @@ # Some definitions matching those in avahi-common/defs.h +import sys + import dbus SERVER_INVALID, SERVER_REGISTERING, SERVER_RUNNING, SERVER_COLLISION, SERVER_FAILURE = range(0, 5) @@ -66,6 +68,9 @@ DBUS_INTERFACE_SERVICE_RESOLVER = DBUS_NAME + ".ServiceResolver" DBUS_INTERFACE_RECORD_BROWSER = DBUS_NAME + ".RecordBrowser" +if sys.version_info[0] >= 3: + unicode = str + def byte_array_to_string(s): r = "" @@ -86,12 +91,19 @@ def txt_array_to_string_array(t): return l - def string_to_byte_array(s): + if isinstance(s, unicode): + s = s.encode('utf-8') + r = [] for c in s: - r.append(dbus.Byte(ord(c))) + if isinstance(c, int): + # Python 3: iterating over bytes yields ints + r.append(dbus.Byte(c)) + else: + # Python 2: iterating over str yields str + r.append(dbus.Byte(ord(c))) return r @@ -107,6 +119,12 @@ def dict_to_txt_array(txt_dict): l = [] for k,v in txt_dict.items(): - l.append(string_to_byte_array("%s=%s" % (k,v))) + if isinstance(k, unicode): + k = k.encode('utf-8') + + if isinstance(v, unicode): + v = v.encode('utf-8') + + l.append(string_to_byte_array(b"%s=%s" % (k,v))) return l