Preprocessing for Machine Learning in Python
James Chapman
Curriculum Manager, DataCamp
print(tfidf_vec.vocabulary_)
{'200': 0,
'204th': 1,
'33rd': 2,
'ahead': 3,
'alley': 4,
...
print(text_tfidf[3].data)
[0.19392702 0.20261085 ...]
print(text_tfidf[3].indices)
[ 31 102 20 70 5 ...]
vocab = {v:k for k,v in
tfidf_vec.vocabulary_.items()}
print(vocab)
{0: '200',
1: '204th',
2: '33rd',
3: 'ahead',
4: 'alley',
...
zipped_row = dict(zip(text_tfidf[3].indices,
text_tfidf[3].data))
print(zipped_row)
{5: 0.1597882543332701,
7: 0.26576432098763175,
8: 0.18599931331925676,
9: 0.26576432098763175,
10: 0.13077355258450366,
...
def return_weights(vocab, vector, vector_index):
zipped = dict(zip(vector[vector_index].indices, vector[vector_index].data))
return {vocab[i]:zipped[i] for i in vector[vector_index].indices}
print(return_weights(vocab, text_tfidf, 3))
{'and': 0.1597882543332701,
'are': 0.26576432098763175,
'at': 0.18599931331925676,
...
Preprocessing for Machine Learning in Python