Skip to content

Commit 30a1512

Browse files
committed
ENH Use matutils.corpus2dense instead of looping
1 parent cd69e60 commit 30a1512

File tree

1 file changed

+4
-8
lines changed

1 file changed

+4
-8
lines changed

‎ch04/blei_lda.py‎

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from __future__ importprint_function
99
fromwordcloudimportcreate_cloud
1010
try:
11-
fromgensimimportcorpora, models
11+
fromgensimimportcorpora, models, matutils
1212
except:
1313
print("import gensim failed.")
1414
print()
@@ -44,15 +44,11 @@
4444
# We first identify the most discussed topic, i.e., the one with the
4545
# highest total weight
4646

47-
# First, we need to sum up the weights across all the documents
48-
weight=np.zeros(model.num_topics)
49-
fordocincorpus:
50-
forcol, valinmodel[doc]:
51-
weight[col] +=val
52-
# As a reasonable alternative, we could have used the log of val:
53-
# weight[col] += np.log(val)
47+
topics=matutils.corpus2dense(model[corpus], num_terms=model.num_topics)
48+
weight=topics.sum(1)
5449
max_topic=weight.argmax()
5550

51+
5652
# Get the top 64 words for this topic
5753
# Without the argument, show_topic would return only 10 words
5854
words=model.show_topic(max_topic, 64)

0 commit comments

Comments
(0)