Package cherrypy :: Package lib :: Module encoding
[hide private]
[frames] | no frames]

Source Code for Module cherrypy.lib.encoding

  1  import struct 
  2  import time 
  3   
  4  import cherrypy 
  5  from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr 
  6  from cherrypy.lib import file_generator 
  7  from cherrypy.lib import set_vary_header 
  8   
  9   
10 -def decode(encoding=None, default_encoding='utf-8'):
11 """Replace or extend the list of charsets used to decode a request entity. 12 13 Either argument may be a single string or a list of strings. 14 15 encoding 16 If not None, restricts the set of charsets attempted while decoding 17 a request entity to the given set (even if a different charset is given in 18 the Content-Type request header). 19 20 default_encoding 21 Only in effect if the 'encoding' argument is not given. 22 If given, the set of charsets attempted while decoding a request entity is 23 *extended* with the given value(s). 24 25 """ 26 body = cherrypy.request.body 27 if encoding is not None: 28 if not isinstance(encoding, list): 29 encoding = [encoding] 30 body.attempt_charsets = encoding 31 elif default_encoding: 32 if not isinstance(default_encoding, list): 33 default_encoding = [default_encoding] 34 body.attempt_charsets = body.attempt_charsets + default_encoding
35 36
37 -class ResponseEncoder:
38 39 default_encoding = 'utf-8' 40 failmsg = "Response body could not be encoded with %r." 41 encoding = None 42 errors = 'strict' 43 text_only = True 44 add_charset = True 45 debug = False 46
47 - def __init__(self, **kwargs):
48 for k, v in kwargs.items(): 49 setattr(self, k, v) 50 51 self.attempted_charsets = set() 52 request = cherrypy.serving.request 53 if request.handler is not None: 54 # Replace request.handler with self 55 if self.debug: 56 cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') 57 self.oldhandler = request.handler 58 request.handler = self
59
60 - def encode_stream(self, encoding):
61 """Encode a streaming response body. 62 63 Use a generator wrapper, and just pray it works as the stream is 64 being written out. 65 """ 66 if encoding in self.attempted_charsets: 67 return False 68 self.attempted_charsets.add(encoding) 69 70 def encoder(body): 71 for chunk in body: 72 if isinstance(chunk, unicodestr): 73 chunk = chunk.encode(encoding, self.errors) 74 yield chunk
75 self.body = encoder(self.body) 76 return True
77
78 - def encode_string(self, encoding):
79 """Encode a buffered response body.""" 80 if encoding in self.attempted_charsets: 81 return False 82 self.attempted_charsets.add(encoding) 83 84 try: 85 body = [] 86 for chunk in self.body: 87 if isinstance(chunk, unicodestr): 88 chunk = chunk.encode(encoding, self.errors) 89 body.append(chunk) 90 self.body = body 91 except (LookupError, UnicodeError): 92 return False 93 else: 94 return True
95
96 - def find_acceptable_charset(self):
97 request = cherrypy.serving.request 98 response = cherrypy.serving.response 99 100 if self.debug: 101 cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE') 102 if response.stream: 103 encoder = self.encode_stream 104 else: 105 encoder = self.encode_string 106 if "Content-Length" in response.headers: 107 # Delete Content-Length header so finalize() recalcs it. 108 # Encoded strings may be of different lengths from their 109 # unicode equivalents, and even from each other. For example: 110 # >>> t = u"\u7007\u3040" 111 # >>> len(t) 112 # 2 113 # >>> len(t.encode("UTF-8")) 114 # 6 115 # >>> len(t.encode("utf7")) 116 # 8 117 del response.headers["Content-Length"] 118 119 # Parse the Accept-Charset request header, and try to provide one 120 # of the requested charsets (in order of user preference). 121 encs = request.headers.elements('Accept-Charset') 122 charsets = [enc.value.lower() for enc in encs] 123 if self.debug: 124 cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') 125 126 if self.encoding is not None: 127 # If specified, force this encoding to be used, or fail. 128 encoding = self.encoding.lower() 129 if self.debug: 130 cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE') 131 if (not charsets) or "*" in charsets or encoding in charsets: 132 if self.debug: 133 cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENCODE') 134 if encoder(encoding): 135 return encoding 136 else: 137 if not encs: 138 if self.debug: 139 cherrypy.log('Attempting default encoding %r' % 140 self.default_encoding, 'TOOLS.ENCODE') 141 # Any character-set is acceptable. 142 if encoder(self.default_encoding): 143 return self.default_encoding 144 else: 145 raise cherrypy.HTTPError(500, self.failmsg % self.default_encoding) 146 else: 147 for element in encs: 148 if element.qvalue > 0: 149 if element.value == "*": 150 # Matches any charset. Try our default. 151 if self.debug: 152 cherrypy.log('Attempting default encoding due ' 153 'to %r' % element, 'TOOLS.ENCODE') 154 if encoder(self.default_encoding): 155 return self.default_encoding 156 else: 157 encoding = element.value 158 if self.debug: 159 cherrypy.log('Attempting encoding %s (qvalue >' 160 '0)' % element, 'TOOLS.ENCODE') 161 if encoder(encoding): 162 return encoding 163 164 if "*" not in charsets: 165 # If no "*" is present in an Accept-Charset field, then all 166 # character sets not explicitly mentioned get a quality 167 # value of 0, except for ISO-8859-1, which gets a quality 168 # value of 1 if not explicitly mentioned. 169 iso = 'iso-8859-1' 170 if iso not in charsets: 171 if self.debug: 172 cherrypy.log('Attempting ISO-8859-1 encoding', 173 'TOOLS.ENCODE') 174 if encoder(iso): 175 return iso 176 177 # No suitable encoding found. 178 ac = request.headers.get('Accept-Charset') 179 if ac is None: 180 msg = "Your client did not send an Accept-Charset header." 181 else: 182 msg = "Your client sent this Accept-Charset header: %s." % ac 183 msg += " We tried these charsets: %s." % ", ".join(self.attempted_charsets) 184 raise cherrypy.HTTPError(406, msg)
185
186 - def __call__(self, *args, **kwargs):
187 response = cherrypy.serving.response 188 self.body = self.oldhandler(*args, **kwargs) 189 190 if isinstance(self.body, basestring): 191 # strings get wrapped in a list because iterating over a single 192 # item list is much faster than iterating over every character 193 # in a long string. 194 if self.body: 195 self.body = [self.body] 196 else: 197 # [''] doesn't evaluate to False, so replace it with []. 198 self.body = [] 199 elif hasattr(self.body, 'read'): 200 self.body = file_generator(self.body) 201 elif self.body is None: 202 self.body = [] 203 204 ct = response.headers.elements("Content-Type") 205 if self.debug: 206 cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCODE') 207 if ct: 208 ct = ct[0] 209 if self.text_only: 210 if ct.value.lower().startswith("text/"): 211 if self.debug: 212 cherrypy.log('Content-Type %s starts with "text/"' % ct, 213 'TOOLS.ENCODE') 214 do_find = True 215 else: 216 if self.debug: 217 cherrypy.log('Not finding because Content-Type %s does ' 218 'not start with "text/"' % ct, 219 'TOOLS.ENCODE') 220 do_find = False 221 else: 222 if self.debug: 223 cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE') 224 do_find = True 225 226 if do_find: 227 # Set "charset=..." param on response Content-Type header 228 ct.params['charset'] = self.find_acceptable_charset() 229 if self.add_charset: 230 if self.debug: 231 cherrypy.log('Setting Content-Type %s' % ct, 232 'TOOLS.ENCODE') 233 response.headers["Content-Type"] = str(ct) 234 235 return self.body
236 237 # GZIP 238
239 -def compress(body, compress_level):
240 """Compress 'body' at the given compress_level.""" 241 import zlib 242 243 # See http://www.gzip.org/zlib/rfc-gzip.html 244 yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker 245 yield ntob('\x08') # CM: compression method 246 yield ntob('\x00') # FLG: none set 247 # MTIME: 4 bytes 248 yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16)) 249 yield ntob('\x02') # XFL: max compression, slowest algo 250 yield ntob('\xff') # OS: unknown 251 252 crc = zlib.crc32(ntob("")) 253 size = 0 254 zobj = zlib.compressobj(compress_level, 255 zlib.DEFLATED, -zlib.MAX_WBITS, 256 zlib.DEF_MEM_LEVEL, 0) 257 for line in body: 258 size += len(line) 259 crc = zlib.crc32(line, crc) 260 yield zobj.compress(line) 261 yield zobj.flush() 262 263 # CRC32: 4 bytes 264 yield struct.pack("<L", crc & int('FFFFFFFF', 16)) 265 # ISIZE: 4 bytes 266 yield struct.pack("<L", size & int('FFFFFFFF', 16))
267
268 -def decompress(body):
269 import gzip 270 271 zbuf = BytesIO() 272 zbuf.write(body) 273 zbuf.seek(0) 274 zfile = gzip.GzipFile(mode='rb', fileobj=zbuf) 275 data = zfile.read() 276 zfile.close() 277 return data
278 279
280 -def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], debug=False):
281 """Try to gzip the response body if Content-Type in mime_types. 282 283 cherrypy.response.headers['Content-Type'] must be set to one of the 284 values in the mime_types arg before calling this function. 285 286 The provided list of mime-types must be of one of the following form: 287 * type/subtype 288 * type/* 289 * type/*+subtype 290 291 No compression is performed if any of the following hold: 292 * The client sends no Accept-Encoding request header 293 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header 294 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present 295 * The 'identity' value is given with a qvalue > 0. 296 297 """ 298 request = cherrypy.serving.request 299 response = cherrypy.serving.response 300 301 set_vary_header(response, "Accept-Encoding") 302 303 if not response.body: 304 # Response body is empty (might be a 304 for instance) 305 if debug: 306 cherrypy.log('No response body', context='TOOLS.GZIP') 307 return 308 309 # If returning cached content (which should already have been gzipped), 310 # don't re-zip. 311 if getattr(request, "cached", False): 312 if debug: 313 cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') 314 return 315 316 acceptable = request.headers.elements('Accept-Encoding') 317 if not acceptable: 318 # If no Accept-Encoding field is present in a request, 319 # the server MAY assume that the client will accept any 320 # content coding. In this case, if "identity" is one of 321 # the available content-codings, then the server SHOULD use 322 # the "identity" content-coding, unless it has additional 323 # information that a different content-coding is meaningful 324 # to the client. 325 if debug: 326 cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') 327 return 328 329 ct = response.headers.get('Content-Type', '').split(';')[0] 330 for coding in acceptable: 331 if coding.value == 'identity' and coding.qvalue != 0: 332 if debug: 333 cherrypy.log('Non-zero identity qvalue: %s' % coding, 334 context='TOOLS.GZIP') 335 return 336 if coding.value in ('gzip', 'x-gzip'): 337 if coding.qvalue == 0: 338 if debug: 339 cherrypy.log('Zero gzip qvalue: %s' % coding, 340 context='TOOLS.GZIP') 341 return 342 343 if ct not in mime_types: 344 # If the list of provided mime-types contains tokens 345 # such as 'text/*' or 'application/*+xml', 346 # we go through them and find the most appropriate one 347 # based on the given content-type. 348 # The pattern matching is only caring about the most 349 # common cases, as stated above, and doesn't support 350 # for extra parameters. 351 found = False 352 if '/' in ct: 353 ct_media_type, ct_sub_type = ct.split('/') 354 for mime_type in mime_types: 355 if '/' in mime_type: 356 media_type, sub_type = mime_type.split('/') 357 if ct_media_type == media_type: 358 if sub_type == '*': 359 found = True 360 break 361 elif '+' in sub_type and '+' in ct_sub_type: 362 ct_left, ct_right = ct_sub_type.split('+') 363 left, right = sub_type.split('+') 364 if left == '*' and ct_right == right: 365 found = True 366 break 367 368 if not found: 369 if debug: 370 cherrypy.log('Content-Type %s not in mime_types %r' % 371 (ct, mime_types), context='TOOLS.GZIP') 372 return 373 374 if debug: 375 cherrypy.log('Gzipping', context='TOOLS.GZIP') 376 # Return a generator that compresses the page 377 response.headers['Content-Encoding'] = 'gzip' 378 response.body = compress(response.body, compress_level) 379 if "Content-Length" in response.headers: 380 # Delete Content-Length header so finalize() recalcs it. 381 del response.headers["Content-Length"] 382 383 return 384 385 if debug: 386 cherrypy.log('No acceptable encoding found.', context='GZIP') 387 cherrypy.HTTPError(406, "identity, gzip").set_response()
388