①结果呈现②特征提取③分词④数据分析⑤文本数据获取
正确的顺序是( )
其中用作文本的特征项的是( )
请回答下列问题:
txt = open('chjfc. txt', 'r', encoding='utf-8'). read( )
words-txt. split( )
word_ counts={ }
for word in words:
if len(word)==1:
continue
else:
① #可以为1行或多行代码
#使用word_counts计算词语word在words中出现的次数
word_ list=list(word_counts. items( )) #返回所有键值对信息,生成列表
word_ list. sort(key-lambda x:x[1], reverse=True) #按词语出现次数降序排序
for i in range(20):
word, times= ②
print(word, times)
程序中划线①处应填写的代码是
程序中划线②处应填写的代码是
①搜索引擎②自动摘要③论文查重④成绩查询⑤自动应答
data. txt 记事本
他处理“data.txt”文件中英文单词的Python程序段如下:
file='data.txt'
word_ c=[]
n=0
for word in open(file):
if word[0:1]=='c':
word_c. append(word)
print(word)
print('字母c开头的单词个数:',n)
import pandas as pd
def cleantext( ):
txt = open("textbook.txt","r"). read( )
txt= ① #将字符串中所有大写字母转为小写
for ch in '! ( );:''',.? ' :
txt = txt.replace(ch,"") #用空格替代ch的值
return txt
booktxt = cleantext( )
words = booktxt. split( )
#以空格为分隔符分割文本并生成列表
counts= { }
for word in words:
counts[word]=counts. get( word,0)+1
items= -list(counts. items( )) #返回所有键值对信息,生成列表
df= pd.DataFrame(items,columns=['word','times'])
df1= df.sort _values('word')
df1.plot( x='word', y='times', kind='line', igsize=(8,3))
df2= ②
print('文件中出现的不同单词数:', ③ )
print(df2[:10])
① ② ③
掌握程度 |
程序末尾须添加的输出语句 |
非常熟练(出现10次以上) |
|
熟练(出现2~10次) |
|
一般(出现1次) |
①分词 ②特征提取 ③数据分析 ④结果呈现
word = ["yellow", "accent", "call", "excel", "tea", "little", "brother"] #存储结点的数据区域
turn = [4,-1,6,2,5,3,1] #存储结点的指针区域
del_word = input("请输入要删除的单词:")
head = 0 #头指针为head
pre_point = -1
while point != -1:
if :
point = turn[point]
break #break退出当前循环
pre_point = point
point = turn[point]
if pre_point == -1: #删除头节点
head = point
elif point == -1: #删除尾节点
turn[pre_point] = -1
else:
turn[pre_point] = point
point = head
print("删除单词后词典为:")
while point != -1:
print(word[point],end=" ")
print('\n')
import import import
pandas as pd
os,jieba,re,random,wordcloud
matplotlib.pyplot as plt
from PIL import Image
wzdir = "./2021 浙江高考满分作文/"
wz = os.listdir(wzdir) #获得文件夹中所有文件的名称列表
wzrd = ①
f=open(wzdir+wzrd[0],encoding="utf-8")
dd=f.read ()
f.close()
#使用正则表达式去除文章中的标点符号
ss = re.sub("[、,。:“”;?\n]","",dd)
wb = jieba.lcut(ss,cut_all=True)
word = {}
for i in wb:
t =i.strip()
if len(t)>1:
if t in word:
word[t]+=1
else:
②
wc = wordcloud.WordCloud(font_path="msyh.ttc", width=800, height=600) wc.background_color="white"
wc.fit_words (word)
img = wc.to_array()
plt.rcParams['font.sans-serif']=['SimHei'] plt.figure()
plt.imshow(img)
plt.axis(False)
plt.title(wzrd[0].split(".")[0])
③
#支持中文显示
②③