Skip to content

Commit 5d33b50

Browse files
committed
Moar speed: fast path for :;{}()[] tokens
1 parent cffc0ea commit 5d33b50

File tree

3 files changed

+53
-45
lines changed

3 files changed

+53
-45
lines changed

‎tinycss/speedups.pyx‎

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -89,22 +89,28 @@ def tokenize_flat(css_source, int ignore_comments=1):
8989

9090
tokens = []
9191
while pos < source_len:
92-
for type_ inxrange(n_tokens):
93-
type_name, regexp = compiled_tokens[type_]
94-
match = regexp(css_source, pos)
95-
if match:
96-
# First match is the longest. See comments on TOKENS above.
97-
css_value = match.group()
98-
break
92+
char= css_source[pos]
93+
ifcharin':;{}()[]':
94+
type_ =-1# not parsed further anyway
95+
type_name =char
96+
css_value =char
9997
else:
100-
# No match.
101-
# "Any other character not matched by the above rules,
102-
# and neither a single nor a double quote."
103-
# ... but quotes at the start of a token are always matched
104-
# by STRING or BAD_STRING. So DELIM is any single character.
105-
type_ = DELIM
106-
type_name ='DELIM'
107-
css_value = css_source[pos]
98+
for type_ inxrange(n_tokens):
99+
type_name, regexp = compiled_tokens[type_]
100+
match = regexp(css_source, pos)
101+
if match:
102+
# First match is the longest. See comments on TOKENS above.
103+
css_value = match.group()
104+
break
105+
else:
106+
# No match.
107+
# "Any other character not matched by the above rules,
108+
# and neither a single nor a double quote."
109+
# ... but quotes at the start of a token are always matched
110+
# by STRING or BAD_STRING. So DELIM is any single character.
111+
type_ = DELIM
112+
type_name ='DELIM'
113+
css_value =char
108114
length =len(css_value)
109115
next_pos = pos + length
110116

‎tinycss/token_data.py‎

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -118,29 +118,26 @@
118118
def_init():
119119
"""Import-time initialization."""
120120
COMPILED_MACROS.clear()
121-
# Formatter is broken on PyPy: https://bugs.pypy.org/issue1081
122-
# expand_macros = functools.partial(
123-
# Formatter().vformat, args=(), kwargs=COMPILED_MACROS)
124-
125121
forlineinMACROS.splitlines():
126122
ifline.strip():
127123
name, value=line.split('\t')
128124
COMPILED_MACROS[name.strip()] ='(?:%s)' \
129125
%value.format(**COMPILED_MACROS)
130126

131-
delCOMPILED_TOKEN_REGEXPS[:]
132-
forlineinTOKENS.splitlines():
133-
ifline.strip():
134-
name, value=line.split('\t')
135-
COMPILED_TOKEN_REGEXPS.append((
136-
name.strip(),
137-
re.compile(
138-
value.format(**COMPILED_MACROS),
139-
# Case-insensitive when matching eg. uRL(foo)
140-
# but preserve the case in extracted groups
141-
re.I
142-
).match
143-
))
127+
COMPILED_TOKEN_REGEXPS[:] = (
128+
(
129+
name.strip(),
130+
re.compile(
131+
value.format(**COMPILED_MACROS),
132+
# Case-insensitive when matching eg. uRL(foo)
133+
# but preserve the case in extracted groups
134+
re.I
135+
).match
136+
)
137+
forlineinTOKENS.splitlines()
138+
ifline.strip()
139+
forname, valuein [line.split('\t')]
140+
)
144141

145142
COMPILED_TOKEN_INDEXES.clear()
146143
fori, (name, regexp) inenumerate(COMPILED_TOKEN_REGEXPS):

‎tinycss/tokenizer.py‎

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,25 @@ def tokenize_flat(css_source, ignore_comments=True,
4848
source_len=len(css_source)
4949
tokens= []
5050
whilepos<source_len:
51-
fortype_, regexpincompiled_tokens:
52-
match=regexp(css_source, pos)
53-
ifmatch:
54-
# First match is the longest. See comments on TOKENS above.
55-
css_value=match.group()
56-
break
51+
char=css_source[pos]
52+
ifcharin':;{}()[]':
53+
type_=char
54+
css_value=char
5755
else:
58-
# No match.
59-
# "Any other character not matched by the above rules,
60-
# and neither a single nor a double quote."
61-
# ... but quotes at the start of a token are always matched
62-
# by STRING or BAD_STRING. So DELIM is any single character.
63-
type_='DELIM'
64-
css_value=css_source[pos]
56+
fortype_, regexpincompiled_tokens:
57+
match=regexp(css_source, pos)
58+
ifmatch:
59+
# First match is the longest. See comments on TOKENS above.
60+
css_value=match.group()
61+
break
62+
else:
63+
# No match.
64+
# "Any other character not matched by the above rules,
65+
# and neither a single nor a double quote."
66+
# ... but quotes at the start of a token are always matched
67+
# by STRING or BAD_STRING. So DELIM is any single character.
68+
type_='DELIM'
69+
css_value=char
6570
length=len(css_value)
6671
next_pos=pos+length
6772

0 commit comments

Comments
(0)