py3 compliant

wrichert · luispedro · commit 2f8ee9e5a62a · 2015-03-25T20:22:25.000+01:00
diff --git a/ch06/01_start.py b/ch06/01_start.py
@@ -83,7 +83,7 @@ def train_model(clf_factory, X, Y, name="NB ngram", plot=False):
 
 summary = (np.mean(scores), np.std(scores),
 np.mean(pr_scores), np.std(pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
 
 return np.mean(train_errors), np.mean(test_errors)
 
@@ -94,38 +94,38 @@ def print_incorrect(clf, X, Y):
 X_wrong = X[wrong_idx]
 Y_wrong = Y[wrong_idx]
 Y_hat_wrong = Y_hat[wrong_idx]
- for idx in xrange(len(X_wrong)):
- print "clf.predict('%s')=%i instead of %i" %\
- (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
+ for idx in range(len(X_wrong)):
+ print("clf.predict('%s')=%i instead of %i" %
+ (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
 
 
 if __name__ == "__main__":
 X_orig, Y_orig = load_sanders_data()
 classes = np.unique(Y_orig)
 for c in classes:
- print "#%s: %i" % (c, sum(Y_orig == c))
+ print("#%s: %i" % (c, sum(Y_orig == c)))
 
- print "== Pos vs. neg =="
+ print("== Pos vs. neg ==")
 pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
 X = X_orig[pos_neg]
 Y = Y_orig[pos_neg]
 Y = tweak_labels(Y, ["positive"])
 
 train_model(create_ngram_model, X, Y, name="pos vs neg", plot=True)
 
- print "== Pos/neg vs. irrelevant/neutral =="
+ print("== Pos/neg vs. irrelevant/neutral ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["positive", "negative"])
 train_model(create_ngram_model, X, Y, name="sent vs rest", plot=True)
 
- print "== Pos vs. rest =="
+ print("== Pos vs. rest ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["positive"])
 train_model(create_ngram_model, X, Y, name="pos vs rest", plot=True)
 
- print "== Neg vs. rest =="
+ print("== Neg vs. rest ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["negative"])
 train_model(create_ngram_model, X, Y, name="neg vs rest", plot=True)
 
- print "time spent:", time.time() - start_time
+ print("time spent:", time.time() - start_time)
diff --git a/ch06/02_tuning.py b/ch06/02_tuning.py
@@ -64,7 +64,7 @@ def grid_search_model(clf_factory, X, Y):
 verbose=10)
 grid_search.fit(X, Y)
 clf = grid_search.best_estimator_
- print clf
+ print(clf)
 
 return clf
 
@@ -114,7 +114,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
 
 summary = (np.mean(scores), np.std(scores),
 np.mean(pr_scores), np.std(pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
 
 return np.mean(train_errors), np.mean(test_errors)
 
@@ -125,9 +125,9 @@ def print_incorrect(clf, X, Y):
 X_wrong = X[wrong_idx]
 Y_wrong = Y[wrong_idx]
 Y_hat_wrong = Y_hat[wrong_idx]
- for idx in xrange(len(X_wrong)):
- print "clf.predict('%s')=%i instead of %i" %\
- (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
+ for idx in range(len(X_wrong)):
+ print("clf.predict('%s')=%i instead of %i" %
+ (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
 
 
 def get_best_model():
@@ -149,33 +149,33 @@ def get_best_model():
 X_orig, Y_orig = load_sanders_data()
 classes = np.unique(Y_orig)
 for c in classes:
- print "#%s: %i" % (c, sum(Y_orig == c))
+ print("#%s: %i" % (c, sum(Y_orig == c)))
 
- print "== Pos vs. neg =="
+ print("== Pos vs. neg ==")
 pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
 X = X_orig[pos_neg]
 Y = Y_orig[pos_neg]
 Y = tweak_labels(Y, ["positive"])
 train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
 
- print "== Pos/neg vs. irrelevant/neutral =="
+ print("== Pos/neg vs. irrelevant/neutral ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["positive", "negative"])
 
 # best_clf = grid_search_model(create_ngram_model, X, Y, name="sent vs
 # rest", plot=True)
 train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
 
- print "== Pos vs. rest =="
+ print("== Pos vs. rest ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["positive"])
 train_model(get_best_model(), X, Y, name="pos vs rest",
 plot=True)
 
- print "== Neg vs. rest =="
+ print("== Neg vs. rest ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["negative"])
 train_model(get_best_model(), X, Y, name="neg vs rest",
 plot=True)
 
- print "time spent:", time.time() - start_time
+ print("time spent:", time.time() - start_time)
diff --git a/ch06/03_clean.py b/ch06/03_clean.py
@@ -57,7 +57,7 @@
 }
 
 emo_repl_order = [k for (k_len, k) in reversed(
- sorted([(len(k), k) for k in emo_repl.keys()]))]
+ sorted([(len(k), k) for k in list(emo_repl.keys())]))]
 
 re_repl ={
 r"\br\b": "are",
@@ -84,7 +84,7 @@ def preprocessor(tweet):
 
 for k in emo_repl_order:
 tweet = tweet.replace(k, emo_repl[k])
- for r, repl in re_repl.iteritems():
+ for r, repl in re_repl.items():
 tweet = re.sub(r, repl, tweet)
 
 return tweet
@@ -150,7 +150,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
 
 summary = (np.mean(scores), np.std(scores),
 np.mean(pr_scores), np.std(pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
 
 return np.mean(train_errors), np.mean(test_errors)
 
@@ -161,9 +161,9 @@ def print_incorrect(clf, X, Y):
 X_wrong = X[wrong_idx]
 Y_wrong = Y[wrong_idx]
 Y_hat_wrong = Y_hat[wrong_idx]
- for idx in xrange(len(X_wrong)):
- print "clf.predict('%s')=%i instead of %i" %\
- (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
+ for idx in range(len(X_wrong)):
+ print("clf.predict('%s')=%i instead of %i" %
+ (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
 
 
 def get_best_model():
@@ -185,33 +185,33 @@ def get_best_model():
 X_orig, Y_orig = load_sanders_data()
 classes = np.unique(Y_orig)
 for c in classes:
- print "#%s: %i" % (c, sum(Y_orig == c))
+ print("#%s: %i" % (c, sum(Y_orig == c)))
 
- print "== Pos vs. neg =="
+ print("== Pos vs. neg ==")
 pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
 X = X_orig[pos_neg]
 Y = Y_orig[pos_neg]
 Y = tweak_labels(Y, ["positive"])
 train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
 
- print "== Pos/neg vs. irrelevant/neutral =="
+ print("== Pos/neg vs. irrelevant/neutral ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["positive", "negative"])
 
 # best_clf = grid_search_model(create_union_model, X, Y, name="sent vs
 # rest", plot=True)
 train_model(get_best_model(), X, Y, name="pos+neg vs rest", plot=True)
 
- print "== Pos vs. rest =="
+ print("== Pos vs. rest ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["positive"])
 train_model(get_best_model(), X, Y, name="pos vs rest",
 plot=True)
 
- print "== Neg vs. rest =="
+ print("== Neg vs. rest ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["negative"])
 train_model(get_best_model(), X, Y, name="neg vs rest",
 plot=True)
 
- print "time spent:", time.time() - start_time
+ print("time spent:", time.time() - start_time)
diff --git a/ch06/04_sent.py b/ch06/04_sent.py
@@ -153,7 +153,7 @@ def transform(self, documents):
 }
 
 emo_repl_order = [k for (k_len, k) in reversed(
- sorted([(len(k), k) for k in emo_repl.keys()]))]
+ sorted([(len(k), k) for k in list(emo_repl.keys())]))]
 
 re_repl ={
 r"\br\b": "are",
@@ -179,7 +179,7 @@ def preprocessor(tweet):
 
 for k in emo_repl_order:
 tweet = tweet.replace(k, emo_repl[k])
- for r, repl in re_repl.iteritems():
+ for r, repl in re_repl.items():
 tweet = re.sub(r, repl, tweet)
 
 return tweet.replace("-", " ").replace("_", " ")
@@ -220,7 +220,7 @@ def __grid_search_model(clf_factory, X, Y):
 verbose=10)
 grid_search.fit(X, Y)
 clf = grid_search.best_estimator_
- print clf
+ print(clf)
 
 return clf
 
@@ -275,7 +275,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
 
 summary = (np.mean(scores), np.std(scores),
 np.mean(pr_scores), np.std(pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
 
 return np.mean(train_errors), np.mean(test_errors)
 
@@ -286,9 +286,9 @@ def print_incorrect(clf, X, Y):
 X_wrong = X[wrong_idx]
 Y_wrong = Y[wrong_idx]
 Y_hat_wrong = Y_hat[wrong_idx]
- for idx in xrange(len(X_wrong)):
- print "clf.predict('%s')=%i instead of %i" %\
- (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
+ for idx in range(len(X_wrong)):
+ print("clf.predict('%s')=%i instead of %i" %
+ (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
 
 
 def get_best_model():
@@ -315,35 +315,35 @@ def get_best_model():
 #Y_orig = Y_orig[:100,]
 classes = np.unique(Y_orig)
 for c in classes:
- print "#%s: %i" % (c, sum(Y_orig == c))
+ print("#%s: %i" % (c, sum(Y_orig == c)))
 
- print "== Pos vs. neg =="
+ print("== Pos vs. neg ==")
 pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
 X = X_orig[pos_neg]
 Y = Y_orig[pos_neg]
 Y = tweak_labels(Y, ["positive"])
 train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
 
- print "== Pos/neg vs. irrelevant/neutral =="
+ print("== Pos/neg vs. irrelevant/neutral ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["positive", "negative"])
 
 # best_clf = grid_search_model(create_union_model, X, Y, name="sent vs
 # rest", plot=True)
 train_model(get_best_model(), X, Y, name="pos+neg vs rest", plot=True)
 
- print "== Pos vs. rest =="
+ print("== Pos vs. rest ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["positive"])
 train_model(get_best_model(), X, Y, name="pos vs rest",
 plot=True)
 
- print "== Neg vs. rest =="
+ print("== Neg vs. rest ==")
 X = X_orig
 Y = tweak_labels(Y_orig, ["negative"])
 train_model(get_best_model(), X, Y, name="neg vs rest",
 plot=True)
 
- print "time spent:", time.time() - start_time
+ print("time spent:", time.time() - start_time)
 
 json.dump(poscache, open(poscache_filename, "w"))

-Original file line number
+Diff line change
 summary= (np.mean(scores), np.std(scores),
 np.mean(pr_scores), np.std(pr_scores))
 -print"%.3f\t%.3f\t%.3f\t%.3f\t"%summary
 +print("%.3f\t%.3f\t%.3f\t%.3f\t"%summary)
 returnnp.mean(train_errors), np.mean(test_errors)
 X_wrong=X[wrong_idx]
 Y_wrong=Y[wrong_idx]
 Y_hat_wrong=Y_hat[wrong_idx]
 -foridxinxrange(len(X_wrong)):
 -print"clf.predict('%s')=%i instead of %i"%\
 - (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
 +foridxinrange(len(X_wrong)):
 +print("clf.predict('%s')=%i instead of %i"%
 +(X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
 if__name__=="__main__":
 X_orig, Y_orig=load_sanders_data()
 classes=np.unique(Y_orig)
 forcinclasses:
 -print"#%s: %i"% (c, sum(Y_orig==c))
 +print("#%s: %i"% (c, sum(Y_orig==c)))
 -print"== Pos vs. neg =="
 +print("== Pos vs. neg ==")
 pos_neg=np.logical_or(Y_orig=="positive", Y_orig=="negative")
 X=X_orig[pos_neg]
 Y=Y_orig[pos_neg]
 Y=tweak_labels(Y, ["positive"])
 train_model(create_ngram_model, X, Y, name="pos vs neg", plot=True)
 -print"== Pos/neg vs. irrelevant/neutral =="
 +print("== Pos/neg vs. irrelevant/neutral ==")
 X=X_orig
 Y=tweak_labels(Y_orig, ["positive", "negative"])
 train_model(create_ngram_model, X, Y, name="sent vs rest", plot=True)
 -print"== Pos vs. rest =="
 +print("== Pos vs. rest ==")
 X=X_orig
 Y=tweak_labels(Y_orig, ["positive"])
 train_model(create_ngram_model, X, Y, name="pos vs rest", plot=True)
 -print"== Neg vs. rest =="
 +print("== Neg vs. rest ==")
 X=X_orig
 Y=tweak_labels(Y_orig, ["negative"])
 train_model(create_ngram_model, X, Y, name="neg vs rest", plot=True)
 -print"time spent:", time.time() -start_time
 +print("time spent:", time.time() -start_time)