Dmitry Shmidt | 8d520ff | 2011-05-09 14:06:53 -0700 | [diff] [blame] | 1 | /* |
| 2 | * UPnP XML helper routines |
| 3 | * Copyright (c) 2000-2003 Intel Corporation |
| 4 | * Copyright (c) 2006-2007 Sony Corporation |
| 5 | * Copyright (c) 2008-2009 Atheros Communications |
| 6 | * Copyright (c) 2009, Jouni Malinen <j@w1.fi> |
| 7 | * |
| 8 | * See wps_upnp.c for more details on licensing and code history. |
| 9 | */ |
| 10 | |
| 11 | #include "includes.h" |
| 12 | |
| 13 | #include "common.h" |
| 14 | #include "base64.h" |
| 15 | #include "http.h" |
| 16 | #include "upnp_xml.h" |
| 17 | |
| 18 | |
| 19 | /* |
| 20 | * XML parsing and formatting |
| 21 | * |
| 22 | * XML is a markup language based on unicode; usually (and in our case, |
| 23 | * always!) based on utf-8. utf-8 uses a variable number of bytes per |
| 24 | * character. utf-8 has the advantage that all non-ASCII unicode characters are |
| 25 | * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII |
| 26 | * characters are single ascii bytes, thus we can use typical text processing. |
| 27 | * |
| 28 | * (One other interesting thing about utf-8 is that it is possible to look at |
| 29 | * any random byte and determine if it is the first byte of a character as |
| 30 | * versus a continuation byte). |
| 31 | * |
| 32 | * The base syntax of XML uses a few ASCII punctionation characters; any |
| 33 | * characters that would appear in the payload data are rewritten using |
| 34 | * sequences, e.g., & for ampersand(&) and < for left angle bracket (<). |
| 35 | * Five such escapes total (more can be defined but that does not apply to our |
| 36 | * case). Thus we can safely parse for angle brackets etc. |
| 37 | * |
| 38 | * XML describes tree structures of tagged data, with each element beginning |
| 39 | * with an opening tag <label> and ending with a closing tag </label> with |
| 40 | * matching label. (There is also a self-closing tag <label/> which is supposed |
| 41 | * to be equivalent to <label></label>, i.e., no payload, but we are unlikely |
| 42 | * to see it for our purpose). |
| 43 | * |
| 44 | * Actually the opening tags are a little more complicated because they can |
| 45 | * contain "attributes" after the label (delimited by ascii space or tab chars) |
| 46 | * of the form attribute_label="value" or attribute_label='value'; as it turns |
| 47 | * out we do not have to read any of these attributes, just ignore them. |
| 48 | * |
| 49 | * Labels are any sequence of chars other than space, tab, right angle bracket |
| 50 | * (and ?), but may have an inner structure of <namespace><colon><plain_label>. |
| 51 | * As it turns out, we can ignore the namespaces, in fact we can ignore the |
| 52 | * entire tree hierarchy, because the plain labels we are looking for will be |
| 53 | * unique (not in general, but for this application). We do however have to be |
| 54 | * careful to skip over the namespaces. |
| 55 | * |
| 56 | * In generating XML we have to be more careful, but that is easy because |
| 57 | * everything we do is pretty canned. The only real care to take is to escape |
| 58 | * any special chars in our payload. |
| 59 | */ |
| 60 | |
| 61 | /** |
| 62 | * xml_next_tag - Advance to next tag |
| 63 | * @in: Input |
| 64 | * @out: OUT: start of tag just after '<' |
| 65 | * @out_tagname: OUT: start of name of tag, skipping namespace |
| 66 | * @end: OUT: one after tag |
| 67 | * Returns: 0 on success, 1 on failure |
| 68 | * |
| 69 | * A tag has form: |
| 70 | * <left angle bracket><...><right angle bracket> |
| 71 | * Within the angle brackets, there is an optional leading forward slash (which |
| 72 | * makes the tag an ending tag), then an optional leading label (followed by |
| 73 | * colon) and then the tag name itself. |
| 74 | * |
| 75 | * Note that angle brackets present in the original data must have been encoded |
| 76 | * as < and > so they will not trouble us. |
| 77 | */ |
Dmitry Shmidt | 1f69aa5 | 2012-01-24 16:10:04 -0800 | [diff] [blame] | 78 | int xml_next_tag(const char *in, const char **out, |
| 79 | const char **out_tagname, const char **end) |
Dmitry Shmidt | 8d520ff | 2011-05-09 14:06:53 -0700 | [diff] [blame] | 80 | { |
| 81 | while (*in && *in != '<') |
| 82 | in++; |
| 83 | if (*in != '<') |
| 84 | return 1; |
| 85 | *out = ++in; |
| 86 | if (*in == '/') |
| 87 | in++; |
| 88 | *out_tagname = in; /* maybe */ |
| 89 | while (isalnum(*in) || *in == '-') |
| 90 | in++; |
| 91 | if (*in == ':') |
| 92 | *out_tagname = ++in; |
| 93 | while (*in && *in != '>') |
| 94 | in++; |
| 95 | if (*in != '>') |
| 96 | return 1; |
| 97 | *end = ++in; |
| 98 | return 0; |
| 99 | } |
| 100 | |
| 101 | |
| 102 | /* xml_data_encode -- format data for xml file, escaping special characters. |
| 103 | * |
| 104 | * Note that we assume we are using utf8 both as input and as output! |
| 105 | * In utf8, characters may be classed as follows: |
| 106 | * 0xxxxxxx(2) -- 1 byte ascii char |
| 107 | * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80 |
| 108 | * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here) |
| 109 | * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here) |
| 110 | * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here) |
| 111 | * 10xxxxxx(2) -- extension byte (6 payload bits per byte) |
| 112 | * Some values implied by the above are however illegal because they |
| 113 | * do not represent unicode chars or are not the shortest encoding. |
| 114 | * Actually, we can almost entirely ignore the above and just do |
| 115 | * text processing same as for ascii text. |
| 116 | * |
| 117 | * XML is written with arbitrary unicode characters, except that five |
| 118 | * characters have special meaning and so must be escaped where they |
| 119 | * appear in payload data... which we do here. |
| 120 | */ |
| 121 | void xml_data_encode(struct wpabuf *buf, const char *data, int len) |
| 122 | { |
| 123 | int i; |
| 124 | for (i = 0; i < len; i++) { |
| 125 | u8 c = ((u8 *) data)[i]; |
| 126 | if (c == '<') { |
| 127 | wpabuf_put_str(buf, "<"); |
| 128 | continue; |
| 129 | } |
| 130 | if (c == '>') { |
| 131 | wpabuf_put_str(buf, ">"); |
| 132 | continue; |
| 133 | } |
| 134 | if (c == '&') { |
| 135 | wpabuf_put_str(buf, "&"); |
| 136 | continue; |
| 137 | } |
| 138 | if (c == '\'') { |
| 139 | wpabuf_put_str(buf, "'"); |
| 140 | continue; |
| 141 | } |
| 142 | if (c == '"') { |
| 143 | wpabuf_put_str(buf, """); |
| 144 | continue; |
| 145 | } |
| 146 | /* |
| 147 | * We could try to represent control characters using the |
| 148 | * sequence: &#x; where x is replaced by a hex numeral, but not |
| 149 | * clear why we would do this. |
| 150 | */ |
| 151 | wpabuf_put_u8(buf, c); |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | |
| 156 | /* xml_add_tagged_data -- format tagged data as a new xml line. |
| 157 | * |
| 158 | * tag must not have any special chars. |
| 159 | * data may have special chars, which are escaped. |
| 160 | */ |
| 161 | void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data) |
| 162 | { |
| 163 | wpabuf_printf(buf, "<%s>", tag); |
| 164 | xml_data_encode(buf, data, os_strlen(data)); |
| 165 | wpabuf_printf(buf, "</%s>\n", tag); |
| 166 | } |
| 167 | |
| 168 | |
| 169 | /* A POST body looks something like (per upnp spec): |
| 170 | * <?xml version="1.0"?> |
| 171 | * <s:Envelope |
| 172 | * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/" |
| 173 | * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/"> |
| 174 | * <s:Body> |
| 175 | * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v"> |
| 176 | * <argumentName>in arg value</argumentName> |
| 177 | * other in args and their values go here, if any |
| 178 | * </u:actionName> |
| 179 | * </s:Body> |
| 180 | * </s:Envelope> |
| 181 | * |
| 182 | * where : |
| 183 | * s: might be some other namespace name followed by colon |
| 184 | * u: might be some other namespace name followed by colon |
| 185 | * actionName will be replaced according to action requested |
| 186 | * schema following actionName will be WFA scheme instead |
| 187 | * argumentName will be actual argument name |
| 188 | * (in arg value) will be actual argument value |
| 189 | */ |
| 190 | char * xml_get_first_item(const char *doc, const char *item) |
| 191 | { |
| 192 | const char *match = item; |
| 193 | int match_len = os_strlen(item); |
| 194 | const char *tag, *tagname, *end; |
| 195 | char *value; |
| 196 | |
| 197 | /* |
| 198 | * This is crude: ignore any possible tag name conflicts and go right |
| 199 | * to the first tag of this name. This should be ok for the limited |
| 200 | * domain of UPnP messages. |
| 201 | */ |
| 202 | for (;;) { |
| 203 | if (xml_next_tag(doc, &tag, &tagname, &end)) |
| 204 | return NULL; |
| 205 | doc = end; |
| 206 | if (!os_strncasecmp(tagname, match, match_len) && |
| 207 | *tag != '/' && |
| 208 | (tagname[match_len] == '>' || |
| 209 | !isgraph(tagname[match_len]))) { |
| 210 | break; |
| 211 | } |
| 212 | } |
| 213 | end = doc; |
| 214 | while (*end && *end != '<') |
| 215 | end++; |
| 216 | value = os_zalloc(1 + (end - doc)); |
| 217 | if (value == NULL) |
| 218 | return NULL; |
| 219 | os_memcpy(value, doc, end - doc); |
| 220 | return value; |
| 221 | } |
| 222 | |
| 223 | |
| 224 | struct wpabuf * xml_get_base64_item(const char *data, const char *name, |
| 225 | enum http_reply_code *ret) |
| 226 | { |
| 227 | char *msg; |
| 228 | struct wpabuf *buf; |
| 229 | unsigned char *decoded; |
| 230 | size_t len; |
| 231 | |
| 232 | msg = xml_get_first_item(data, name); |
| 233 | if (msg == NULL) { |
| 234 | *ret = UPNP_ARG_VALUE_INVALID; |
| 235 | return NULL; |
| 236 | } |
| 237 | |
Ahmed ElArabawy | 0ff61c5 | 2019-12-26 12:38:39 -0800 | [diff] [blame^] | 238 | decoded = base64_decode(msg, os_strlen(msg), &len); |
Dmitry Shmidt | 8d520ff | 2011-05-09 14:06:53 -0700 | [diff] [blame] | 239 | os_free(msg); |
| 240 | if (decoded == NULL) { |
| 241 | *ret = UPNP_OUT_OF_MEMORY; |
| 242 | return NULL; |
| 243 | } |
| 244 | |
| 245 | buf = wpabuf_alloc_ext_data(decoded, len); |
| 246 | if (buf == NULL) { |
| 247 | os_free(decoded); |
| 248 | *ret = UPNP_OUT_OF_MEMORY; |
| 249 | return NULL; |
| 250 | } |
| 251 | return buf; |
| 252 | } |