以下是所有安装和导入:
!pip install wordcloud
!pip install fileupload
!pip install ipywidgets
!jupyter nbextension install --py --user fileupload
!jupyter nbextension enable --py fileupload
import wordcloud
import numpy as np
from matplotlib import pyplot as plt
from IPython.display import display
import fileupload
import io
import sys
这是上载器小部件:
from IPython.display import display
import fileupload
uploader = fileupload.FileUploadWidget()
def _handle_upload(change):
w = change['owner']
with open(w.filename, 'wb') as f:
f.write(w.data)
print('Uploaded `{}` ({:.2f} kB)'.format(
w.filename, len(w.data) / 2**10))
uploader.observe(_handle_upload, names='data')
display(uploader)
Wordcloud程序:
def calculate_frequencies(file_contents):
# Here is a list of punctuations and uninteresting words you can use to process your text
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
uninteresting_words = ["the", "a", "to", "if", "is", "it", "of", "and", "or", "an", "as", "i", "me", "my", \
"we", "our", "ours", "you", "your", "yours", "he", "she", "him", "his", "her", "hers", "its", "they", "them", \
"their", "what", "which", "who", "whom", "this", "that", "am", "are", "was", "were", "be", "been", "being", \
"have", "has", "had", "do", "does", "did", "but", "at", "by", "with", "from", "here", "when", "where", "how", \
"all", "any", "both", "each", "few", "more", "some", "such", "no", "nor", "too", "very", "can", "will", "just"]
# LEARNER CODE START HERE
non_punctuation_text=""
for char in file_contents:
if char not in punctuations:
non_punctuation_text=non_punctuation_text+char
words=non_punctuation_text.split()
clean_words=[]
frequencies={}
for word in words:
if word.isalpha():
if word not in uninteresting_words:
clean_words.append(word)
for alpha_word in clean_words:
if alpha_word not in frequencies:
frequencies[alpha_word]=1
else:
frequencies[alpha_word]+=1
#wordcloud
cloud = wordcloud.WordCloud()
cloud.generate_from_frequencies(frequencies)
return cloud.to_array()
显示我们的wordcloud图片:
myimage = calculate_frequencies(file_contents)
plt.imshow(myimage, interpolation = 'nearest')
plt.axis('off')
plt.show()
错误信息:
NameError Traceback (most recent call last)
<ipython-input-2-fd0f708f372c> in <module>
1 # Display your wordcloud image
2
----> 3 myimage = calculate_frequencies(file_contents)
4 plt.imshow(myimage, interpolation = 'nearest')
5 plt.axis('off')
NameError: name 'file_contents' is not defined
我缺少
file_contents
参数的哪一部分?为什么不显示wordcloud?
我尝试从
.txt
小部件上传不同的
uploader
文档,但没有帮助。
请您参考如下方法:
您必须将文件/文本添加为calculate_frequencies函数的输入。
Here is an example.
myimage = calculate_frequencies("Humpty Dumpty is a character in
an English nursery rhyme, probably originally a riddle and one of the
best known in the English-speaking world. He is typically portrayed
as an anthropomorphic egg, though he is not explicitly described
as such. ")
plt.imshow(myimage, interpolation = 'nearest')
plt.axis('off') plt.show()
只需从网站上复制粘贴任何随机段落,然后看到“云”这个词就可以活跃起来了。
:)