string.find returns -1 when string evidently in source file

bryan rasmussen rasmussen.bryan at gmail.com
Sun Feb 18 05:04:27 EST 2007


hi,

the following is returning -1:

import string
import sys

x = open("latvian.txt",'r')

x1 = x.read()

print x1.find("LAYOUT")

---

given a file like this

KBD	Layout01	"Latvian (QWERTY) (Custom)"

COPYRIGHT	"(c) 2007 IG"

COMPANY	"IG"

LOCALEID	"00000426"

VERSION	1.0

SHIFTSTATE

0	//Column 4
1	//Column 5 : Shft
2	//Column 6 :       Ctrl
6	//Column 7 :       Ctrl Alt
7	//Column 8 : Shft  Ctrl Alt

LAYOUT		;an extra '@' at the end is a dead key

//SC	VK_		Cap	0	1	2	6	7
//--	----		----	----	----	----	----	----

02	1		0	1	0021	-1	00a0	-1		// DIGIT ONE, EXCLAMATION MARK, <none>,
NO-BREAK SPACE, <none>
03	2		0	2	0040	-1	00ab	-1		// DIGIT TWO, COMMERCIAL AT, <none>,
LEFT-POINTING DOUBLE ANGLE QUOTATION MARK *, <none>
04	3		0	3	0023	-1	00bb	-1		// DIGIT THREE, NUMBER SIGN, <none>,
RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK *, <none>
05	4		0	4	0024	-1	20ac	00a7		// DIGIT FOUR, DOLLAR SIGN, <none>, EURO
SIGN, SECTION SIGN
06	5		0	5	0025	-1	-1	00b0		// DIGIT FIVE, PERCENT SIGN, <none>,
<none>, DEGREE SIGN
07	6		0	6	005e	-1	2019	-1		// DIGIT SIX, CIRCUMFLEX ACCENT, <none>,
RIGHT SINGLE QUOTATION MARK, <none>
08	7		0	7	0026	-1	-1	00b1		// DIGIT SEVEN, AMPERSAND, <none>, <none>,
PLUS-MINUS SIGN
09	8		0	8	002a	-1	-1	00d7		// DIGIT EIGHT, ASTERISK, <none>, <none>,
MULTIPLICATION SIGN
0a	9		0	9	0028	-1	-1	-1		// DIGIT NINE, LEFT PARENTHESIS, <none>, <none>, <none>
0b	0		0	0	0029	-1	-1	-1		// DIGIT ZERO, RIGHT PARENTHESIS, <none>,
<none>, <none>
0c	OEM_MINUS	0	002d	005f	-1	2013	2014		// HYPHEN-MINUS, LOW LINE,
<none>, EN DASH, EM DASH
0d	OEM_PLUS	0	003d	002b	-1	-1	-1		// EQUALS SIGN, PLUS SIGN, <none>,
<none>, <none>
10	Q		1	q	Q	-1	-1	-1		// LATIN SMALL LETTER Q, LATIN CAPITAL LETTER Q,
<none>, <none>, <none>
11	W		1	w	W	-1	-1	-1		// LATIN SMALL LETTER W, LATIN CAPITAL LETTER W,
<none>, <none>, <none>
12	E		1	e	E	-1	0113	0112		// LATIN SMALL LETTER E, LATIN CAPITAL
LETTER E, <none>, LATIN SMALL LETTER E WITH MACRON, LATIN CAPITAL
LETTER E WITH MACRON
13	R		1	r	R	-1	0157	0156		// LATIN SMALL LETTER R, LATIN CAPITAL
LETTER R, <none>, LATIN SMALL LETTER R WITH CEDILLA, LATIN CAPITAL
LETTER R WITH CEDILLA
14	T		1	t	T	-1	-1	-1		// LATIN SMALL LETTER T, LATIN CAPITAL LETTER T,
<none>, <none>, <none>
15	Y		1	y	Y	-1	-1	-1		// LATIN SMALL LETTER Y, LATIN CAPITAL LETTER Y,
<none>, <none>, <none>
16	U		1	u	U	-1	016b	016a		// LATIN SMALL LETTER U, LATIN CAPITAL
LETTER U, <none>, LATIN SMALL LETTER U WITH MACRON, LATIN CAPITAL
LETTER U WITH MACRON
17	I		1	i	I	-1	012b	012a		// LATIN SMALL LETTER I, LATIN CAPITAL
LETTER I, <none>, LATIN SMALL LETTER I WITH MACRON, LATIN CAPITAL
LETTER I WITH MACRON
18	O		1	o	O	-1	00f5	00d5		// LATIN SMALL LETTER O, LATIN CAPITAL
LETTER O, <none>, LATIN SMALL LETTER O WITH TILDE, LATIN CAPITAL
LETTER O WITH TILDE
19	P		1	p	P	-1	-1	-1		// LATIN SMALL LETTER P, LATIN CAPITAL LETTER P,
<none>, <none>, <none>
1a	OEM_4		0	005b	007b	-1	-1	-1		// LEFT SQUARE BRACKET, LEFT CURLY
BRACKET, <none>, <none>, <none>
1b	OEM_6		0	005d	007d	-1	-1	-1		// RIGHT SQUARE BRACKET, RIGHT CURLY
BRACKET, <none>, <none>, <none>
1e	A		1	a	A	-1	0101	0100		// LATIN SMALL LETTER A, LATIN CAPITAL
LETTER A, <none>, LATIN SMALL LETTER A WITH MACRON, LATIN CAPITAL
LETTER A WITH MACRON
1f	S		1	s	S	-1	0161	0160		// LATIN SMALL LETTER S, LATIN CAPITAL
LETTER S, <none>, LATIN SMALL LETTER S WITH CARON, LATIN CAPITAL
LETTER S WITH CARON
20	D		1	d	D	-1	-1	-1		// LATIN SMALL LETTER D, LATIN CAPITAL LETTER D,
<none>, <none>, <none>
21	F		1	f	F	-1	-1	-1		// LATIN SMALL LETTER F, LATIN CAPITAL LETTER F,
<none>, <none>, <none>
22	G		1	g	G	-1	0123	0122		// LATIN SMALL LETTER G, LATIN CAPITAL
LETTER G, <none>, LATIN SMALL LETTER G WITH CEDILLA, LATIN CAPITAL
LETTER G WITH CEDILLA
23	H		1	h	H	-1	-1	-1		// LATIN SMALL LETTER H, LATIN CAPITAL LETTER H,
<none>, <none>, <none>
24	J		1	j	J	-1	-1	-1		// LATIN SMALL LETTER J, LATIN CAPITAL LETTER J,
<none>, <none>, <none>
25	K		1	k	K	-1	0137	0136		// LATIN SMALL LETTER K, LATIN CAPITAL
LETTER K, <none>, LATIN SMALL LETTER K WITH CEDILLA, LATIN CAPITAL
LETTER K WITH CEDILLA
26	L		1	l	L	-1	013c	013b		// LATIN SMALL LETTER L, LATIN CAPITAL
LETTER L, <none>, LATIN SMALL LETTER L WITH CEDILLA, LATIN CAPITAL
LETTER L WITH CEDILLA
27	OEM_1		0	003b	003a	-1	-1	-1		// SEMICOLON, COLON, <none>, <none>, <none>
28	OEM_7		0	0027	0022	-1	00b4@	00a8@		// APOSTROPHE, QUOTATION MARK,
<none>, ACUTE ACCENT, DIAERESIS
29	OEM_3		0	0060	007e@	-1	00ad	-1		// GRAVE ACCENT, TILDE, <none>,
SOFT HYPHEN, <none>
2b	OEM_5		0	00b0@	007c	-1	-1	-1		// DEGREE SIGN, VERTICAL LINE,
<none>, <none>, <none>
2c	Z		1	z	Z	-1	017e	017d		// LATIN SMALL LETTER Z, LATIN CAPITAL
LETTER Z, <none>, LATIN SMALL LETTER Z WITH CARON, LATIN CAPITAL
LETTER Z WITH CARON
2d	X		1	x	X	-1	-1	-1		// LATIN SMALL LETTER X, LATIN CAPITAL LETTER X,
<none>, <none>, <none>
2e	C		1	c	C	-1	010d	010c		// LATIN SMALL LETTER C, LATIN CAPITAL
LETTER C, <none>, LATIN SMALL LETTER C WITH CARON, LATIN CAPITAL
LETTER C WITH CARON
2f	V		1	v	V	-1	-1	-1		// LATIN SMALL LETTER V, LATIN CAPITAL LETTER V,
<none>, <none>, <none>
30	B		1	b	B	-1	-1	-1		// LATIN SMALL LETTER B, LATIN CAPITAL LETTER B,
<none>, <none>, <none>
31	N		1	n	N	-1	0146	0145		// LATIN SMALL LETTER N, LATIN CAPITAL
LETTER N, <none>, LATIN SMALL LETTER N WITH CEDILLA, LATIN CAPITAL
LETTER N WITH CEDILLA
32	M		1	m	M	-1	-1	-1		// LATIN SMALL LETTER M, LATIN CAPITAL LETTER M,
<none>, <none>, <none>
33	OEM_COMMA	0	002c	003c	-1	-1	-1		// COMMA, LESS-THAN SIGN, <none>,
<none>, <none>
34	OEM_PERIOD	0	002e	003e	-1	-1	-1		// FULL STOP, GREATER-THAN SIGN,
<none>, <none>, <none>
35	OEM_2		0	002f	003f	-1	-1	-1		// SOLIDUS, QUESTION MARK, <none>,
<none>, <none>
39	SPACE		0	0020	0020	0020	-1	-1		// SPACE, SPACE, SPACE, <none>, <none>
56	OEM_102	0	005c	007c	-1	-1	-1		// REVERSE SOLIDUS, VERTICAL LINE,
<none>, <none>, <none>
53	DECIMAL	0	002e	002e	-1	-1	-1		// FULL STOP, FULL STOP, , ,


DEADKEY	00b4

006e	0144	// n -> ń
0063	0107	// c -> ć
007a	017a	// z -> ź
0073	015b	// s -> ś
0065	00e9	// e -> é
006f	00f3	// o -> ó
004e	0143	// N -> Ń
0043	0106	// C -> Ć
005a	0179	// Z -> Ź
0053	015a	// S -> Ś
0045	00c9	// E -> É
004f	00d3	// O -> Ó
0020	00b4	//   -> ´

DEADKEY	00a8

0061	00e4	// a -> ä
0075	00fc	// u -> ü
006f	00f6	// o -> ö
0041	00c4	// A -> Ä
0055	00dc	// U -> Ü
004f	00d6	// O -> Ö
0020	00a8	//   -> ¨

DEADKEY	007e

006f	00f5	// o -> õ
004f	00d5	// O -> Õ
0020	007e	//   -> ~

DEADKEY	00b0

007a	017c	// z -> ż
0061	00e5	// a -> å
0067	0121	// g -> ġ
0065	0117	// e -> ė
005a	017b	// Z -> Ż
0041	00c5	// A -> Å
0045	0116	// E -> Ė
0020	00b0	//   -> °


KEYNAME

01	Esc
0e	Backspace
0f	Tab
1c	Enter
1d	Ctrl
2a	Shift
36	"Right Shift"
37	"Num *"
38	Alt
39	Space
3a	"Caps Lock"
3b	F1
3c	F2
3d	F3
3e	F4
3f	F5
40	F6
41	F7
42	F8
43	F9
44	F10
45	Pause
46	"Scroll Lock"
47	"Num 7"
48	"Num 8"
49	"Num 9"
4a	"Num -"
4b	"Num 4"
4c	"Num 5"
4d	"Num 6"
4e	"Num +"
4f	"Num 1"
50	"Num 2"
51	"Num 3"
52	"Num 0"
53	"Num Del"
54	"Sys Req"
57	F11
58	F12
7c	F13
7d	F14
7e	F15
7f	F16
80	F17
81	F18
82	F19
83	F20
84	F21
85	F22
86	F23
87	F24

KEYNAME_EXT

1c	"Num Enter"
1d	"Right Ctrl"
35	"Num /"
37	"Prnt Scrn"
38	"Right Alt"
45	"Num Lock"
46	Break
47	Home
48	Up
49	"Page Up"
4b	Left
4d	Right
4f	End
50	Down
51	"Page Down"
52	Insert
53	Delete
54	<00>
56	Help
5b	"Left Windows"
5c	"Right Windows"
5d	Application

KEYNAME_DEAD

00b4	"ACUTE ACCENT"
00a8	"DIAERESIS"
007e	"TILDE"
00b0	"DEGREE SIGN"


ENDKBD


the encoding of the file is Unicode, I am able to return instances of
individual characters but not whole words.

Cheers,
Bryan Rasmussen


More information about the Python-list mailing list