[Python-checkins] CVS: python/dist/src/Modules binascii.c,2.29,2.30

Sun, 30 Sep 2001 13:32:13 -0700

Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv10858/Modules

Modified Files:
	binascii.c 
Log Message:
Patch #462190, patch #464070: Support quoted printable in the binascii module.
Decode and encode underscores for header style encoding. Fixes bug #463996.


Index: binascii.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/binascii.c,v
retrieving revision 2.29
retrieving revision 2.30
diff -C2 -d -r2.29 -r2.30
*** binascii.c	2001/06/07 05:51:36	2.29
--- binascii.c	2001/09/30 20:32:11	2.30
***************
*** 43,46 ****
--- 43,55 ----
  **
  ** Jack Jansen, CWI, July 1995.
+ ** 
+ ** Added support for quoted-printable encoding, based on rfc 1521 et al
+ ** quoted-printable encoding specifies that non printable characters (anything 
+ ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
+ ** of the character.  It also specifies some other behavior to enable 8bit data
+ ** in a mail message with little difficulty (maximum line sizes, protecting 
+ ** some cases of whitespace, etc).    
+ **
+ ** Brandon Long, September 2001.
  */
  
***************
*** 972,975 ****
--- 981,1267 ----
  This function is also available as \"unhexlify()\"";
  
+ static int table_hex[128] = {
+   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
+   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
+ };
+ 
+ #define hexval(c) table_hex[(unsigned int)(c)]
+ 
+ #define MAXLINESIZE 76
+ 
+ static char doc_a2b_qp[] = "Decode a string of qp-encoded data";
+ 
+ static PyObject* 
+ binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
+ {
+ 	unsigned int in, out;
+ 	char ch;
+ 	unsigned char *data, *odata;
+ 	unsigned int datalen = 0;
+ 	PyObject *rv;
+ 	static char *kwlist[] = {"data", "header", NULL};
+ 	int header = 0;
+ 
+ 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data, 
+ 	      &datalen, &header))
+ 		return NULL;
+ 
+ 	/* We allocate the output same size as input, this is overkill */
+ 	odata = (char *) calloc(1, datalen);
+ 
+ 	if (odata == NULL) {
+ 		PyErr_NoMemory();
+ 		return NULL;
+ 	}
+ 
+ 	in = out = 0;
+ 	while (in < datalen) {
+ 		if (data[in] == '=') {
+ 			in++;
+ 			if (in >= datalen) break;
+ 			/* Soft line breaks */
+ 			if ((data[in] == '\n') || (data[in] == '\r') || 
+ 			    (data[in] == ' ') || (data[in] == '\t')) {
+ 				if (data[in] != '\n') {
+ 					while (in < datalen && data[in] != '\n') in++;
+ 				}
+ 				if (in < datalen) in++;
+ 			}
+ 			else if (data[in] == '=') {
+ 				/* broken case from broken python qp */
+ 				odata[out++] = '=';
+ 				in++;
+ 			}
+ 			else if (((data[in] >= 'A' && data[in] <= 'F') || 
+ 			          (data[in] >= 'a' && data[in] <= 'f') ||
+ 				  (data[in] >= '0' && data[in] <= '9')) &&
+ 			         ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
+ 				  (data[in+1] >= 'a' && data[in+1] <= 'f') ||
+ 				  (data[in+1] >= '0' && data[in+1] <= '9'))) {
+ 				/* hexval */
+ 				ch = hexval(data[in]) << 4;
+ 				in++;
+ 				ch |= hexval(data[in]);
+ 				in++;
+ 				odata[out++] = ch;
+ 			}
+ 			else {
+ 			  odata[out++] = '=';
+ 			}
+ 		}
+ 		else if (header && data[in] == '_') {
+ 			odata[out++] = ' ';
+ 			in++;
+ 		}
+ 		else {
+ 			odata[out] = data[in];
+ 			in++;
+ 			out++;
+ 		}
+ 	}
+ 	if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
+ 		free (odata);
+ 		return NULL;
+ 	}
+ 	free (odata);
+ 	return rv;
+ }
+ 
+ static int 
+ to_hex (unsigned char ch, unsigned char *s)
+ {
+ 	unsigned int uvalue = ch;
+ 
+ 	s[1] = "0123456789ABCDEF"[uvalue % 16];
+ 	uvalue = (uvalue / 16);
+ 	s[0] = "0123456789ABCDEF"[uvalue % 16];
+ 	return 0;
+ }
+ 
+ static char doc_b2a_qp[] = 
+ "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
+  Encode a string using quoted-printable encoding. \n\
+ \n\
+ On encoding, when istext is set, newlines are not encoded, and white \n\
+ space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
+ both encoded.  When quotetabs is set, space and tabs are encoded.";
+ 
+ /* XXX: This is ridiculously complicated to be backward compatible
+  * (mostly) with the quopri module.  It doesn't re-create the quopri
+  * module bug where text ending in CRLF has the CR encoded */
+ static PyObject* 
+ binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
+ {
+ 	unsigned int in, out;
+ 	unsigned char *data, *odata;
+ 	unsigned int datalen = 0, odatalen = 0;
+ 	PyObject *rv;
+ 	unsigned int linelen = 0;
+ 	static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
+ 	int istext = 1;
+ 	int quotetabs = 0;
+ 	int header = 0;
+ 	unsigned char ch;
+ 	int crlf = 0;
+ 	unsigned char *p;
+ 
+ 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data, 
+ 	      &datalen, &quotetabs, &istext, &header))
+ 		return NULL;
+ 
+ 	/* See if this string is using CRLF line ends */
+ 	/* XXX: this function has the side effect of converting all of
+ 	 * the end of lines to be the same depending on this detection
+ 	 * here */
+ 	p = strchr(data, '\n');
+ 	if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
+ 		crlf = 1;
+ 
+ 	/* First, scan to see how many characters need to be encoded */
+ 	in = 0;
+ 	while (in < datalen) {
+ 		if ((data[in] > 126) || 
+ 		    (data[in] == '=') ||
+ 		    (header && data[in] == '_') ||
+ 		    ((data[in] == '.') && (linelen == 1)) ||
+ 		    (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
+ 		    ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
+ 		    ((data[in] < 33) && 
+ 		     (data[in] != '\r') && (data[in] != '\n') && 
+ 		     (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
+ 		{
+ 			if ((linelen + 3) >= MAXLINESIZE) {
+ 				linelen = 0;
+ 				if (crlf)
+ 					odatalen += 3;
+ 				else
+ 					odatalen += 2;
+ 			}
+ 			linelen += 3;
+ 			odatalen += 3;
+ 			in++;
+ 		}
+ 		else {
+ 		  	if (istext && 
+ 			    ((data[in] == '\n') ||
+ 			     ((in+1 < datalen) && (data[in] == '\r') &&
+ 			     (data[in+1] == '\n'))))
+ 			{
+ 			  	linelen = 0;
+ 				/* Protect against whitespace on end of line */
+ 				if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
+ 					odatalen += 2;
+ 				if (crlf)
+ 					odatalen += 2;
+ 				else
+ 					odatalen += 1;
+ 				if (data[in] == '\r')
+ 					in += 2;
+ 				else
+ 					in++;
+ 			}
+ 			else {
+ 				if ((in + 1 != datalen) && 
+ 				    (data[in+1] != '\n') &&
+ 				    (linelen + 1) >= MAXLINESIZE) {
+ 					linelen = 0;
+ 					if (crlf)
+ 						odatalen += 3;
+ 					else
+ 						odatalen += 2;
+ 				}
+ 				linelen++;
+ 				odatalen++;
+ 				in++;
+ 			}
+ 		}
+ 	}
+ 
+ 	odata = (char *) calloc(1, odatalen);
+ 
+ 	if (odata == NULL) {
+ 		PyErr_NoMemory();
+ 		return NULL;
+ 	}
+ 
+ 	in = out = linelen = 0;
+ 	while (in < datalen) {
+ 		if ((data[in] > 126) || 
+ 		    (data[in] == '=') ||
+ 		    (header && data[in] == '_') ||
+ 		    ((data[in] == '.') && (linelen == 1)) ||
+ 		    (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
+ 		    ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
+ 		    ((data[in] < 33) && 
+ 		     (data[in] != '\r') && (data[in] != '\n') && 
+ 		     (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
+ 		{
+ 			if ((linelen + 3 )>= MAXLINESIZE) {
+ 				odata[out++] = '=';
+ 				if (crlf) odata[out++] = '\r';
+ 				odata[out++] = '\n';
+ 				linelen = 0;
+ 			}
+ 			odata[out++] = '=';
+ 			to_hex(data[in], &odata[out]);
+ 			out += 2;
+ 			in++;
+ 			linelen += 3;
+ 		}
+ 		else {
+ 		  	if (istext && 
+ 			    ((data[in] == '\n') ||
+ 			     ((in+1 < datalen) && (data[in] == '\r') &&
+ 			     (data[in+1] == '\n'))))
+ 			{
+ 			  	linelen = 0;
+ 				/* Protect against whitespace on end of line */
+ 				if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
+ 					ch = odata[out-1];
+ 					odata[out-1] = '=';
+ 					to_hex(ch, &odata[out]);
+ 					out += 2;
+ 				}
+ 					
+ 				if (crlf) odata[out++] = '\r';
+ 				odata[out++] = '\n';
+ 				if (data[in] == '\r')
+ 					in += 2;
+ 				else
+ 					in++;
+ 			}
+ 			else {
+ 				if ((in + 1 != datalen) && 
+ 				    (data[in+1] != '\n') &&
+ 				    (linelen + 1) >= MAXLINESIZE) {
+ 					odata[out++] = '=';
+ 					if (crlf) odata[out++] = '\r';
+ 					odata[out++] = '\n';
+ 					linelen = 0;
+ 				}
+ 				linelen++;
+ 				if (header && data[in] == ' ') {
+ 					odata[out++] = '_';
+ 					in++;
+ 				}
+ 				else {
+ 					odata[out++] = data[in++];
+ 				}
+ 			}
+ 		}
+ 	}
+ 	if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
+ 		free (odata);
+ 		return NULL;
+ 	}
+ 	free (odata);
+ 	return rv;
+ }
  
  /* List of functions defined in the module */
***************
*** 991,994 ****
--- 1283,1290 ----
  	{"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
  	{"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
+ 	{"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS, 
+ 	  doc_a2b_qp},
+ 	{"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS, 
+           doc_b2a_qp},
  	{NULL, NULL}			     /* sentinel */
  };