Skip to content

Commit 2f8ee9e

Browse files
wrichertluispedro
authored andcommitted
py3 compliant
1 parent c1881c9 commit 2f8ee9e

File tree

4 files changed

+46
-46
lines changed

4 files changed

+46
-46
lines changed

‎ch06/01_start.py‎

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def train_model(clf_factory, X, Y, name="NB ngram", plot=False):
8383

8484
summary= (np.mean(scores), np.std(scores),
8585
np.mean(pr_scores), np.std(pr_scores))
86-
print"%.3f\t%.3f\t%.3f\t%.3f\t"%summary
86+
print("%.3f\t%.3f\t%.3f\t%.3f\t"%summary)
8787

8888
returnnp.mean(train_errors), np.mean(test_errors)
8989

@@ -94,38 +94,38 @@ def print_incorrect(clf, X, Y):
9494
X_wrong=X[wrong_idx]
9595
Y_wrong=Y[wrong_idx]
9696
Y_hat_wrong=Y_hat[wrong_idx]
97-
foridxinxrange(len(X_wrong)):
98-
print"clf.predict('%s')=%i instead of %i"%\
99-
(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
97+
foridxinrange(len(X_wrong)):
98+
print("clf.predict('%s')=%i instead of %i"%
99+
(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
100100

101101

102102
if__name__=="__main__":
103103
X_orig, Y_orig=load_sanders_data()
104104
classes=np.unique(Y_orig)
105105
forcinclasses:
106-
print"#%s: %i"% (c, sum(Y_orig==c))
106+
print("#%s: %i"% (c, sum(Y_orig==c)))
107107

108-
print"== Pos vs. neg =="
108+
print("== Pos vs. neg ==")
109109
pos_neg=np.logical_or(Y_orig=="positive", Y_orig=="negative")
110110
X=X_orig[pos_neg]
111111
Y=Y_orig[pos_neg]
112112
Y=tweak_labels(Y, ["positive"])
113113

114114
train_model(create_ngram_model, X, Y, name="pos vs neg", plot=True)
115115

116-
print"== Pos/neg vs. irrelevant/neutral =="
116+
print("== Pos/neg vs. irrelevant/neutral ==")
117117
X=X_orig
118118
Y=tweak_labels(Y_orig, ["positive", "negative"])
119119
train_model(create_ngram_model, X, Y, name="sent vs rest", plot=True)
120120

121-
print"== Pos vs. rest =="
121+
print("== Pos vs. rest ==")
122122
X=X_orig
123123
Y=tweak_labels(Y_orig, ["positive"])
124124
train_model(create_ngram_model, X, Y, name="pos vs rest", plot=True)
125125

126-
print"== Neg vs. rest =="
126+
print("== Neg vs. rest ==")
127127
X=X_orig
128128
Y=tweak_labels(Y_orig, ["negative"])
129129
train_model(create_ngram_model, X, Y, name="neg vs rest", plot=True)
130130

131-
print"time spent:", time.time() -start_time
131+
print("time spent:", time.time() -start_time)

‎ch06/02_tuning.py‎

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def grid_search_model(clf_factory, X, Y):
6464
verbose=10)
6565
grid_search.fit(X, Y)
6666
clf=grid_search.best_estimator_
67-
printclf
67+
print(clf)
6868

6969
returnclf
7070

@@ -114,7 +114,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
114114

115115
summary= (np.mean(scores), np.std(scores),
116116
np.mean(pr_scores), np.std(pr_scores))
117-
print"%.3f\t%.3f\t%.3f\t%.3f\t"%summary
117+
print("%.3f\t%.3f\t%.3f\t%.3f\t"%summary)
118118

119119
returnnp.mean(train_errors), np.mean(test_errors)
120120

@@ -125,9 +125,9 @@ def print_incorrect(clf, X, Y):
125125
X_wrong=X[wrong_idx]
126126
Y_wrong=Y[wrong_idx]
127127
Y_hat_wrong=Y_hat[wrong_idx]
128-
foridxinxrange(len(X_wrong)):
129-
print"clf.predict('%s')=%i instead of %i"%\
130-
(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
128+
foridxinrange(len(X_wrong)):
129+
print("clf.predict('%s')=%i instead of %i"%
130+
(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
131131

132132

133133
defget_best_model():
@@ -149,33 +149,33 @@ def get_best_model():
149149
X_orig, Y_orig=load_sanders_data()
150150
classes=np.unique(Y_orig)
151151
forcinclasses:
152-
print"#%s: %i"% (c, sum(Y_orig==c))
152+
print("#%s: %i"% (c, sum(Y_orig==c)))
153153

154-
print"== Pos vs. neg =="
154+
print("== Pos vs. neg ==")
155155
pos_neg=np.logical_or(Y_orig=="positive", Y_orig=="negative")
156156
X=X_orig[pos_neg]
157157
Y=Y_orig[pos_neg]
158158
Y=tweak_labels(Y, ["positive"])
159159
train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
160160

161-
print"== Pos/neg vs. irrelevant/neutral =="
161+
print("== Pos/neg vs. irrelevant/neutral ==")
162162
X=X_orig
163163
Y=tweak_labels(Y_orig, ["positive", "negative"])
164164

165165
# best_clf = grid_search_model(create_ngram_model, X, Y, name="sent vs
166166
# rest", plot=True)
167167
train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
168168

169-
print"== Pos vs. rest =="
169+
print("== Pos vs. rest ==")
170170
X=X_orig
171171
Y=tweak_labels(Y_orig, ["positive"])
172172
train_model(get_best_model(), X, Y, name="pos vs rest",
173173
plot=True)
174174

175-
print"== Neg vs. rest =="
175+
print("== Neg vs. rest ==")
176176
X=X_orig
177177
Y=tweak_labels(Y_orig, ["negative"])
178178
train_model(get_best_model(), X, Y, name="neg vs rest",
179179
plot=True)
180180

181-
print"time spent:", time.time() -start_time
181+
print("time spent:", time.time() -start_time)

‎ch06/03_clean.py‎

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
}
5858

5959
emo_repl_order= [kfor (k_len, k) inreversed(
60-
sorted([(len(k), k) forkinemo_repl.keys()]))]
60+
sorted([(len(k), k) forkinlist(emo_repl.keys())]))]
6161

6262
re_repl={
6363
r"\br\b": "are",
@@ -84,7 +84,7 @@ def preprocessor(tweet):
8484

8585
forkinemo_repl_order:
8686
tweet=tweet.replace(k, emo_repl[k])
87-
forr, replinre_repl.iteritems():
87+
forr, replinre_repl.items():
8888
tweet=re.sub(r, repl, tweet)
8989

9090
returntweet
@@ -150,7 +150,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
150150

151151
summary= (np.mean(scores), np.std(scores),
152152
np.mean(pr_scores), np.std(pr_scores))
153-
print"%.3f\t%.3f\t%.3f\t%.3f\t"%summary
153+
print("%.3f\t%.3f\t%.3f\t%.3f\t"%summary)
154154

155155
returnnp.mean(train_errors), np.mean(test_errors)
156156

@@ -161,9 +161,9 @@ def print_incorrect(clf, X, Y):
161161
X_wrong=X[wrong_idx]
162162
Y_wrong=Y[wrong_idx]
163163
Y_hat_wrong=Y_hat[wrong_idx]
164-
foridxinxrange(len(X_wrong)):
165-
print"clf.predict('%s')=%i instead of %i"%\
166-
(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
164+
foridxinrange(len(X_wrong)):
165+
print("clf.predict('%s')=%i instead of %i"%
166+
(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
167167

168168

169169
defget_best_model():
@@ -185,33 +185,33 @@ def get_best_model():
185185
X_orig, Y_orig=load_sanders_data()
186186
classes=np.unique(Y_orig)
187187
forcinclasses:
188-
print"#%s: %i"% (c, sum(Y_orig==c))
188+
print("#%s: %i"% (c, sum(Y_orig==c)))
189189

190-
print"== Pos vs. neg =="
190+
print("== Pos vs. neg ==")
191191
pos_neg=np.logical_or(Y_orig=="positive", Y_orig=="negative")
192192
X=X_orig[pos_neg]
193193
Y=Y_orig[pos_neg]
194194
Y=tweak_labels(Y, ["positive"])
195195
train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
196196

197-
print"== Pos/neg vs. irrelevant/neutral =="
197+
print("== Pos/neg vs. irrelevant/neutral ==")
198198
X=X_orig
199199
Y=tweak_labels(Y_orig, ["positive", "negative"])
200200

201201
# best_clf = grid_search_model(create_union_model, X, Y, name="sent vs
202202
# rest", plot=True)
203203
train_model(get_best_model(), X, Y, name="pos+neg vs rest", plot=True)
204204

205-
print"== Pos vs. rest =="
205+
print("== Pos vs. rest ==")
206206
X=X_orig
207207
Y=tweak_labels(Y_orig, ["positive"])
208208
train_model(get_best_model(), X, Y, name="pos vs rest",
209209
plot=True)
210210

211-
print"== Neg vs. rest =="
211+
print("== Neg vs. rest ==")
212212
X=X_orig
213213
Y=tweak_labels(Y_orig, ["negative"])
214214
train_model(get_best_model(), X, Y, name="neg vs rest",
215215
plot=True)
216216

217-
print"time spent:", time.time() -start_time
217+
print("time spent:", time.time() -start_time)

‎ch06/04_sent.py‎

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def transform(self, documents):
153153
}
154154

155155
emo_repl_order= [kfor (k_len, k) inreversed(
156-
sorted([(len(k), k) forkinemo_repl.keys()]))]
156+
sorted([(len(k), k) forkinlist(emo_repl.keys())]))]
157157

158158
re_repl={
159159
r"\br\b": "are",
@@ -179,7 +179,7 @@ def preprocessor(tweet):
179179

180180
forkinemo_repl_order:
181181
tweet=tweet.replace(k, emo_repl[k])
182-
forr, replinre_repl.iteritems():
182+
forr, replinre_repl.items():
183183
tweet=re.sub(r, repl, tweet)
184184

185185
returntweet.replace("-", " ").replace("_", " ")
@@ -220,7 +220,7 @@ def __grid_search_model(clf_factory, X, Y):
220220
verbose=10)
221221
grid_search.fit(X, Y)
222222
clf=grid_search.best_estimator_
223-
printclf
223+
print(clf)
224224

225225
returnclf
226226

@@ -275,7 +275,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
275275

276276
summary= (np.mean(scores), np.std(scores),
277277
np.mean(pr_scores), np.std(pr_scores))
278-
print"%.3f\t%.3f\t%.3f\t%.3f\t"%summary
278+
print("%.3f\t%.3f\t%.3f\t%.3f\t"%summary)
279279

280280
returnnp.mean(train_errors), np.mean(test_errors)
281281

@@ -286,9 +286,9 @@ def print_incorrect(clf, X, Y):
286286
X_wrong=X[wrong_idx]
287287
Y_wrong=Y[wrong_idx]
288288
Y_hat_wrong=Y_hat[wrong_idx]
289-
foridxinxrange(len(X_wrong)):
290-
print"clf.predict('%s')=%i instead of %i"%\
291-
(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
289+
foridxinrange(len(X_wrong)):
290+
print("clf.predict('%s')=%i instead of %i"%
291+
(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
292292

293293

294294
defget_best_model():
@@ -315,35 +315,35 @@ def get_best_model():
315315
#Y_orig = Y_orig[:100,]
316316
classes=np.unique(Y_orig)
317317
forcinclasses:
318-
print"#%s: %i"% (c, sum(Y_orig==c))
318+
print("#%s: %i"% (c, sum(Y_orig==c)))
319319

320-
print"== Pos vs. neg =="
320+
print("== Pos vs. neg ==")
321321
pos_neg=np.logical_or(Y_orig=="positive", Y_orig=="negative")
322322
X=X_orig[pos_neg]
323323
Y=Y_orig[pos_neg]
324324
Y=tweak_labels(Y, ["positive"])
325325
train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
326326

327-
print"== Pos/neg vs. irrelevant/neutral =="
327+
print("== Pos/neg vs. irrelevant/neutral ==")
328328
X=X_orig
329329
Y=tweak_labels(Y_orig, ["positive", "negative"])
330330

331331
# best_clf = grid_search_model(create_union_model, X, Y, name="sent vs
332332
# rest", plot=True)
333333
train_model(get_best_model(), X, Y, name="pos+neg vs rest", plot=True)
334334

335-
print"== Pos vs. rest =="
335+
print("== Pos vs. rest ==")
336336
X=X_orig
337337
Y=tweak_labels(Y_orig, ["positive"])
338338
train_model(get_best_model(), X, Y, name="pos vs rest",
339339
plot=True)
340340

341-
print"== Neg vs. rest =="
341+
print("== Neg vs. rest ==")
342342
X=X_orig
343343
Y=tweak_labels(Y_orig, ["negative"])
344344
train_model(get_best_model(), X, Y, name="neg vs rest",
345345
plot=True)
346346

347-
print"time spent:", time.time() -start_time
347+
print("time spent:", time.time() -start_time)
348348

349349
json.dump(poscache, open(poscache_filename, "w"))

0 commit comments

Comments
(0)