import pandas as pd
df=pd.read_csv(r'D:\BB_internship_Tasks\cloths-rating.csv')
df
| ProductID | UserID | Rating | Text | |
|---|---|---|---|---|
| 0 | 777 | AV1YnR7wglJLPUi8IJmi | 4 | Great taffy at a great price. |
| 1 | 767 | AVpfpK8KLJeJML43BCuD | 4 | Absolutely wonderful - silky and sexy and comf... |
| 2 | 1080 | AVqkIdntQMlgsOJE6fuB | 5 | Love this dress! it's sooo pretty. |
| 3 | 1077 | AVpfpK8KLJeJML43BCuD | 3 | I had such high hopes for this dress and reall... |
| 4 | 1049 | AVpfpK8KLJeJML43BCuD | 5 | I love, love, love this jumpsuit. it's fun, fl... |
| ... | ... | ... | ... | ... |
| 629 | 823 | B08GWV3SM6 | 1 | I placed order 4+1 soaps.But I have received w... |
| 630 | 823 | B08GWV3SM6 | 3 | The soap is ok for bathing, no scent at all, m... |
| 631 | 847 | B08GWV3SM6 | 5 | For a long time I was searching for Indian soa... |
| 632 | 910 | AVph0EeEilAPnD_x9myq | 3 | Good but not great |
| 633 | 333 | AVqkIdntQMlgsOJE6fuB | 5 | Quick,easy to make & tasty too. |
634 rows × 4 columns
import numpy as np
!pip install textblob
from textblob import TextBlob
Requirement already satisfied: textblob in c:\anaconda\lib\site-packages (0.17.1) Requirement already satisfied: nltk>=3.1 in c:\anaconda\lib\site-packages (from textblob) (3.7) Requirement already satisfied: tqdm in c:\anaconda\lib\site-packages (from nltk>=3.1->textblob) (4.64.0) Requirement already satisfied: regex>=2021.8.3 in c:\anaconda\lib\site-packages (from nltk>=3.1->textblob) (2022.3.15) Requirement already satisfied: click in c:\anaconda\lib\site-packages (from nltk>=3.1->textblob) (8.0.4) Requirement already satisfied: joblib in c:\anaconda\lib\site-packages (from nltk>=3.1->textblob) (1.1.0) Requirement already satisfied: colorama in c:\anaconda\lib\site-packages (from click->nltk>=3.1->textblob) (0.4.4)
def sentiment(text):
try:
return TextBlob(str(text)).sentiment.polarity
except:
return None
df['sentiment'] = df['Text'].apply(sentiment)
df
| ProductID | UserID | Rating | Text | sentiment | |
|---|---|---|---|---|---|
| 0 | 777 | AV1YnR7wglJLPUi8IJmi | 4 | Great taffy at a great price. | 0.800000 |
| 1 | 767 | AVpfpK8KLJeJML43BCuD | 4 | Absolutely wonderful - silky and sexy and comf... | 0.633333 |
| 2 | 1080 | AVqkIdntQMlgsOJE6fuB | 5 | Love this dress! it's sooo pretty. | 0.437500 |
| 3 | 1077 | AVpfpK8KLJeJML43BCuD | 3 | I had such high hopes for this dress and reall... | 0.120000 |
| 4 | 1049 | AVpfpK8KLJeJML43BCuD | 5 | I love, love, love this jumpsuit. it's fun, fl... | 0.550000 |
| ... | ... | ... | ... | ... | ... |
| 629 | 823 | B08GWV3SM6 | 1 | I placed order 4+1 soaps.But I have received w... | 0.000000 |
| 630 | 823 | B08GWV3SM6 | 3 | The soap is ok for bathing, no scent at all, m... | 0.325000 |
| 631 | 847 | B08GWV3SM6 | 5 | For a long time I was searching for Indian soa... | -0.025000 |
| 632 | 910 | AVph0EeEilAPnD_x9myq | 3 | Good but not great | 0.150000 |
| 633 | 333 | AVqkIdntQMlgsOJE6fuB | 5 | Quick,easy to make & tasty too. | 0.000000 |
634 rows × 5 columns
df['Updated_score'] = df['Rating']*df['sentiment']
df
| ProductID | UserID | Rating | Text | sentiment | Updated_score | |
|---|---|---|---|---|---|---|
| 0 | 777 | AV1YnR7wglJLPUi8IJmi | 4 | Great taffy at a great price. | 0.800000 | 3.200000 |
| 1 | 767 | AVpfpK8KLJeJML43BCuD | 4 | Absolutely wonderful - silky and sexy and comf... | 0.633333 | 2.533333 |
| 2 | 1080 | AVqkIdntQMlgsOJE6fuB | 5 | Love this dress! it's sooo pretty. | 0.437500 | 2.187500 |
| 3 | 1077 | AVpfpK8KLJeJML43BCuD | 3 | I had such high hopes for this dress and reall... | 0.120000 | 0.360000 |
| 4 | 1049 | AVpfpK8KLJeJML43BCuD | 5 | I love, love, love this jumpsuit. it's fun, fl... | 0.550000 | 2.750000 |
| ... | ... | ... | ... | ... | ... | ... |
| 629 | 823 | B08GWV3SM6 | 1 | I placed order 4+1 soaps.But I have received w... | 0.000000 | 0.000000 |
| 630 | 823 | B08GWV3SM6 | 3 | The soap is ok for bathing, no scent at all, m... | 0.325000 | 0.975000 |
| 631 | 847 | B08GWV3SM6 | 5 | For a long time I was searching for Indian soa... | -0.025000 | -0.125000 |
| 632 | 910 | AVph0EeEilAPnD_x9myq | 3 | Good but not great | 0.150000 | 0.450000 |
| 633 | 333 | AVqkIdntQMlgsOJE6fuB | 5 | Quick,easy to make & tasty too. | 0.000000 | 0.000000 |
634 rows × 6 columns
b=[-1,-0.75,-0.5,-0.25,0,0.25,0.5,0.75,1]
l=[]
for i in b:
for j in range(1,6):
x=i*j
l.append(x)
print(l)
len(l)
[-1, -2, -3, -4, -5, -0.75, -1.5, -2.25, -3.0, -3.75, -0.5, -1.0, -1.5, -2.0, -2.5, -0.25, -0.5, -0.75, -1.0, -1.25, 0, 0, 0, 0, 0, 0.25, 0.5, 0.75, 1.0, 1.25, 0.5, 1.0, 1.5, 2.0, 2.5, 0.75, 1.5, 2.25, 3.0, 3.75, 1, 2, 3, 4, 5]
45
len(set(l))
27
def equalizer(v):
if v <= 0:
return 1
elif v <= 2:
return 2
elif v <= 3:
return 3
elif v <= 4:
return 4
elif v <= 5:
return 5
df['New_score'] = df['Updated_score'].apply(equalizer)
df
| ProductID | UserID | Rating | Text | sentiment | Updated_score | New_score | |
|---|---|---|---|---|---|---|---|
| 0 | 777 | AV1YnR7wglJLPUi8IJmi | 4 | Great taffy at a great price. | 0.800000 | 3.200000 | 4 |
| 1 | 767 | AVpfpK8KLJeJML43BCuD | 4 | Absolutely wonderful - silky and sexy and comf... | 0.633333 | 2.533333 | 3 |
| 2 | 1080 | AVqkIdntQMlgsOJE6fuB | 5 | Love this dress! it's sooo pretty. | 0.437500 | 2.187500 | 3 |
| 3 | 1077 | AVpfpK8KLJeJML43BCuD | 3 | I had such high hopes for this dress and reall... | 0.120000 | 0.360000 | 2 |
| 4 | 1049 | AVpfpK8KLJeJML43BCuD | 5 | I love, love, love this jumpsuit. it's fun, fl... | 0.550000 | 2.750000 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 629 | 823 | B08GWV3SM6 | 1 | I placed order 4+1 soaps.But I have received w... | 0.000000 | 0.000000 | 1 |
| 630 | 823 | B08GWV3SM6 | 3 | The soap is ok for bathing, no scent at all, m... | 0.325000 | 0.975000 | 2 |
| 631 | 847 | B08GWV3SM6 | 5 | For a long time I was searching for Indian soa... | -0.025000 | -0.125000 | 1 |
| 632 | 910 | AVph0EeEilAPnD_x9myq | 3 | Good but not great | 0.150000 | 0.450000 | 2 |
| 633 | 333 | AVqkIdntQMlgsOJE6fuB | 5 | Quick,easy to make & tasty too. | 0.000000 | 0.000000 | 1 |
634 rows × 7 columns
Label Encoding User id and product id
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['UserID'] = le.fit_transform(df['UserID'])
df
| ProductID | UserID | Rating | Text | sentiment | Updated_score | New_score | |
|---|---|---|---|---|---|---|---|
| 0 | 777 | 0 | 4 | Great taffy at a great price. | 0.800000 | 3.200000 | 4 |
| 1 | 767 | 3 | 4 | Absolutely wonderful - silky and sexy and comf... | 0.633333 | 2.533333 | 3 |
| 2 | 1080 | 13 | 5 | Love this dress! it's sooo pretty. | 0.437500 | 2.187500 | 3 |
| 3 | 1077 | 3 | 3 | I had such high hopes for this dress and reall... | 0.120000 | 0.360000 | 2 |
| 4 | 1049 | 3 | 5 | I love, love, love this jumpsuit. it's fun, fl... | 0.550000 | 2.750000 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 629 | 823 | 41 | 1 | I placed order 4+1 soaps.But I have received w... | 0.000000 | 0.000000 | 1 |
| 630 | 823 | 41 | 3 | The soap is ok for bathing, no scent at all, m... | 0.325000 | 0.975000 | 2 |
| 631 | 847 | 41 | 5 | For a long time I was searching for Indian soa... | -0.025000 | -0.125000 | 1 |
| 632 | 910 | 7 | 3 | Good but not great | 0.150000 | 0.450000 | 2 |
| 633 | 333 | 13 | 5 | Quick,easy to make & tasty too. | 0.000000 | 0.000000 | 1 |
634 rows × 7 columns
#from sklearn.preprocessing import LabelEncoder
#le = LabelEncoder()
#df['ProductID'] = le.fit_transform(df['ProductID'])
#df
#we assign product name to product id were name is given in json file named pname.json
#le = LabelEncoder()
#df['ProductNAME'] = le.fit_transform(df['ProductID'])
#df
df_pivot = df.pivot_table(index='ProductID', columns='UserID', values='New_score').fillna(0)
df_pivot
| UserID | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ProductID | |||||||||||||||||||||
| 89 | 0.0 | 0.000000 | 0.000000 | 2.0 | 0.0 | 0.0 | 5.00 | 1.000000 | 2.0 | 0.0 | ... | 0.000000 | 2.00 | 0.0 | 0.0 | 0.000000 | 3.000000 | 0.000000 | 0.000000 | 0.000000 | 3.00 |
| 333 | 0.0 | 2.000000 | 0.000000 | 0.0 | 0.0 | 3.0 | 0.00 | 0.000000 | 4.0 | 1.0 | ... | 0.000000 | 1.00 | 3.0 | 0.0 | 0.000000 | 1.750000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
| 369 | 4.0 | 1.000000 | 0.000000 | 2.0 | 0.0 | 3.0 | 4.00 | 2.000000 | 0.0 | 0.0 | ... | 0.000000 | 0.00 | 0.0 | 0.0 | 0.000000 | 0.000000 | 1.333333 | 0.000000 | 5.000000 | 0.00 |
| 444 | 2.0 | 2.000000 | 0.000000 | 0.0 | 1.0 | 0.0 | 3.00 | 5.000000 | 2.0 | 0.0 | ... | 2.000000 | 0.00 | 2.5 | 0.0 | 0.000000 | 1.666667 | 0.000000 | 0.000000 | 2.666667 | 0.00 |
| 684 | 0.0 | 0.000000 | 3.000000 | 3.0 | 2.0 | 0.0 | 1.00 | 2.000000 | 2.0 | 0.0 | ... | 2.500000 | 0.00 | 3.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.25 |
| 697 | 0.0 | 1.000000 | 2.000000 | 0.0 | 0.0 | 0.0 | 4.00 | 0.000000 | 1.0 | 2.0 | ... | 3.000000 | 2.00 | 3.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.50 |
| 767 | 2.0 | 0.000000 | 0.000000 | 3.0 | 0.0 | 0.0 | 2.00 | 0.000000 | 0.0 | 0.0 | ... | 0.000000 | 1.00 | 0.0 | 0.0 | 2.000000 | 0.000000 | 1.000000 | 1.000000 | 0.000000 | 0.00 |
| 777 | 4.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 1.00 | 0.000000 | 0.0 | 2.0 | ... | 0.000000 | 1.00 | 0.0 | 0.0 | 0.000000 | 0.000000 | 4.000000 | 0.000000 | 0.000000 | 0.00 |
| 823 | 0.0 | 0.000000 | 0.000000 | 4.0 | 0.0 | 0.0 | 5.00 | 1.000000 | 0.0 | 0.0 | ... | 0.000000 | 5.00 | 0.0 | 0.0 | 0.000000 | 4.000000 | 0.000000 | 0.000000 | 0.000000 | 1.50 |
| 847 | 0.0 | 0.000000 | 0.000000 | 2.0 | 0.0 | 0.0 | 2.00 | 2.333333 | 5.0 | 0.0 | ... | 0.000000 | 3.00 | 0.0 | 0.0 | 3.000000 | 0.000000 | 0.000000 | 2.000000 | 0.000000 | 1.00 |
| 853 | 0.0 | 0.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.00 | 0.000000 | 0.0 | 0.0 | ... | 0.000000 | 0.00 | 0.0 | 0.0 | 1.000000 | 0.000000 | 0.000000 | 2.000000 | 0.000000 | 0.00 |
| 858 | 0.0 | 0.000000 | 0.000000 | 0.0 | 2.5 | 0.0 | 3.00 | 2.000000 | 2.0 | 4.0 | ... | 0.000000 | 0.00 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 4.000000 | 0.000000 | 0.00 |
| 862 | 2.0 | 2.333333 | 0.000000 | 3.0 | 2.5 | 0.0 | 2.25 | 1.000000 | 2.5 | 0.0 | ... | 1.666667 | 2.75 | 5.0 | 0.0 | 0.000000 | 3.200000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
| 910 | 0.0 | 2.666667 | 0.000000 | 0.0 | 0.0 | 0.0 | 3.00 | 3.000000 | 3.5 | 0.0 | ... | 0.000000 | 3.00 | 4.0 | 0.0 | 0.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
| 949 | 2.0 | 2.000000 | 2.000000 | 0.0 | 0.0 | 0.0 | 1.00 | 0.000000 | 3.0 | 0.0 | ... | 2.000000 | 0.00 | 2.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.00 |
| 1002 | 4.0 | 4.000000 | 1.000000 | 0.0 | 0.0 | 0.0 | 1.00 | 0.000000 | 0.0 | 0.0 | ... | 1.000000 | 0.00 | 2.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
| 1003 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 1.00 | 0.000000 | 2.0 | 0.0 | ... | 2.000000 | 0.00 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
| 1049 | 2.0 | 3.000000 | 0.000000 | 3.0 | 0.0 | 0.0 | 0.00 | 0.000000 | 0.0 | 0.0 | ... | 0.000000 | 0.00 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
| 1060 | 1.5 | 2.000000 | 1.333333 | 0.0 | 2.0 | 0.0 | 0.00 | 2.000000 | 2.0 | 0.0 | ... | 2.250000 | 0.00 | 2.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
| 1065 | 0.0 | 0.000000 | 0.000000 | 1.0 | 0.0 | 0.0 | 0.00 | 0.000000 | 0.0 | 0.0 | ... | 0.000000 | 0.00 | 0.0 | 0.0 | 2.500000 | 0.000000 | 0.000000 | 3.000000 | 0.000000 | 0.00 |
| 1077 | 0.0 | 2.000000 | 0.000000 | 2.0 | 1.0 | 0.0 | 1.50 | 1.500000 | 1.0 | 2.0 | ... | 0.000000 | 0.00 | 0.0 | 0.0 | 1.333333 | 2.000000 | 0.000000 | 3.166667 | 0.000000 | 0.00 |
| 1080 | 0.0 | 2.000000 | 0.000000 | 0.0 | 1.0 | 0.0 | 2.00 | 1.000000 | 0.0 | 1.0 | ... | 0.000000 | 4.00 | 0.0 | 0.0 | 1.000000 | 0.000000 | 0.000000 | 4.000000 | 0.000000 | 0.00 |
| 1095 | 0.0 | 0.000000 | 0.000000 | 1.5 | 3.0 | 0.0 | 0.00 | 1.666667 | 0.0 | 2.0 | ... | 0.000000 | 0.00 | 0.0 | 4.0 | 0.000000 | 0.000000 | 1.500000 | 1.500000 | 3.500000 | 0.00 |
| 1120 | 0.0 | 0.000000 | 2.000000 | 0.0 | 0.0 | 0.0 | 0.00 | 0.000000 | 0.0 | 0.0 | ... | 0.000000 | 0.00 | 0.0 | 0.0 | 2.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.00 |
| 6969 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00 | 0.000000 | 0.0 | 3.0 | ... | 0.000000 | 3.00 | 0.0 | 0.0 | 0.000000 | 3.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
| 8001 | 0.0 | 2.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.00 | 0.000000 | 0.0 | 3.5 | ... | 0.000000 | 0.00 | 0.0 | 0.0 | 0.000000 | 0.000000 | 2.000000 | 0.000000 | 1.000000 | 0.00 |
| 9696 | 1.0 | 2.000000 | 0.000000 | 2.0 | 0.0 | 0.0 | 1.00 | 2.500000 | 1.0 | 1.0 | ... | 0.000000 | 2.75 | 0.0 | 0.0 | 0.000000 | 1.500000 | 0.000000 | 0.000000 | 0.000000 | 0.00 |
27 rows × 42 columns
from scipy.sparse import csr_matrix
df_pivot_matrix = csr_matrix(df_pivot.values)
print(df_pivot_matrix)
(0, 3) 2.0 (0, 6) 5.0 (0, 7) 1.0 (0, 8) 2.0 (0, 11) 3.0 (0, 16) 2.5 (0, 18) 3.0 (0, 20) 1.0 (0, 22) 4.0 (0, 28) 3.0 (0, 33) 2.0 (0, 37) 3.0 (0, 41) 3.0 (1, 1) 2.0 (1, 5) 3.0 (1, 8) 4.0 (1, 9) 1.0 (1, 10) 2.0 (1, 13) 2.0 (1, 17) 1.25 (1, 22) 4.0 (1, 25) 1.25 (1, 29) 2.5 (1, 33) 1.0 (1, 34) 3.0 : : (25, 17) 4.0 (25, 21) 2.0 (25, 22) 1.0 (25, 25) 2.0 (25, 30) 3.3333333333333335 (25, 38) 2.0 (25, 40) 1.0 (26, 0) 1.0 (26, 1) 2.0 (26, 3) 2.0 (26, 6) 1.0 (26, 7) 2.5 (26, 8) 1.0 (26, 9) 1.0 (26, 11) 2.0 (26, 16) 2.0 (26, 17) 1.6 (26, 18) 2.3333333333333335 (26, 19) 1.0 (26, 22) 1.0 (26, 23) 2.0 (26, 25) 3.3333333333333335 (26, 28) 2.0 (26, 33) 2.75 (26, 37) 1.5
from textblob import TextBlob
from sklearn.metrics.pairwise import cosine_similarity
similarity_matrix = cosine_similarity(df_pivot)
similarity_matrix
array([[1. , 0.34676265, 0.33723697, 0.38107647, 0.49477232,
0.4627069 , 0.26858532, 0.26651864, 0.77150131, 0.31717595,
0.12426828, 0.32679076, 0.48985768, 0.43446939, 0.24805411,
0.07312724, 0.26360285, 0.0849059 , 0.17477676, 0.07714055,
0.36102125, 0.32580206, 0.15758538, 0. , 0.48787147,
0.14209634, 0.6188595 ],
[0.34676265, 1. , 0.38234126, 0.27725706, 0.24058911,
0.44108693, 0.17812137, 0.04713354, 0.26572428, 0.32450053,
0.10424852, 0.18953488, 0.45902114, 0.46497647, 0.40290215,
0.24140622, 0.16303523, 0.10010346, 0.36632991, 0. ,
0.21012683, 0.16645129, 0.16442598, 0. , 0.40356568,
0.29916176, 0.34769634],
[0.33723697, 0.38234126, 1. , 0.41102231, 0.15576679,
0.35196052, 0.52867562, 0.41047143, 0.36782369, 0.34109431,
0.13971053, 0.20874106, 0.28451058, 0.23410013, 0.22394512,
0.33097939, 0.06519597, 0.33803328, 0.27467108, 0.02078238,
0.33898223, 0.18037318, 0.48589697, 0. , 0.11129753,
0.34826045, 0.30593496],
[0.38107647, 0.27725706, 0.41102231, 1. , 0.62813508,
0.51757807, 0.32052195, 0.34077389, 0.33004843, 0.41336535,
0.42167198, 0.38264425, 0.55974449, 0.55331328, 0.607712 ,
0.41140598, 0.25048207, 0.14474891, 0.46625563, 0. ,
0.52756832, 0.45944987, 0.3116387 , 0. , 0.11961723,
0.27108714, 0.49497036],
[0.49477232, 0.24058911, 0.15576679, 0.62813508, 1. ,
0.56489663, 0.21779944, 0.20179174, 0.31660479, 0.31055744,
0.39313159, 0.34284119, 0.52909726, 0.4205376 , 0.45651157,
0.24160368, 0.34800801, 0.13149326, 0.43943034, 0.0568891 ,
0.35229527, 0.25257893, 0.13504185, 0.11786218, 0.0812434 ,
0.05239613, 0.42389844],
[0.4627069 , 0.44108693, 0.35196052, 0.51757807, 0.56489663,
1. , 0.3374719 , 0.20944919, 0.54894968, 0.50589621,
0.54157829, 0.34678732, 0.50189295, 0.49485643, 0.59931599,
0.33679247, 0.43595531, 0.04655236, 0.3927721 , 0.16917906,
0.36581576, 0.39361578, 0.16589032, 0.20863287, 0.29337732,
0.21146687, 0.28769927],
[0.26858532, 0.17812137, 0.52867562, 0.32052195, 0.21779944,
0.3374719 , 1. , 0.42306669, 0.42672391, 0.64658897,
0.44328343, 0.36475482, 0.31119524, 0.21586737, 0.25855073,
0.1721648 , 0.07399165, 0.68146193, 0.15586368, 0.38327503,
0.66607827, 0.42450317, 0.48568076, 0.40721272, 0.08420856,
0.2425777 , 0.26683661],
[0.26651864, 0.04713354, 0.41047143, 0.34077389, 0.20179174,
0.20944919, 0.42306669, 1. , 0.23319906, 0.20041728,
0.10827631, 0.2935387 , 0.21051075, 0.06590964, 0.19927088,
0.35203403, 0.0244741 , 0.28050536, 0.15122737, 0.03120622,
0.33603027, 0.35123294, 0.34137671, 0. , 0.20054543,
0.31136764, 0.34737312],
[0.77150131, 0.26572428, 0.36782369, 0.33004843, 0.31660479,
0.54894968, 0.42672391, 0.23319906, 1. , 0.45509476,
0.27293185, 0.26214854, 0.5268653 , 0.39301529, 0.20092067,
0.06735267, 0.2308468 , 0.15640241, 0.05962049, 0.16239785,
0.39003611, 0.39483518, 0.2365806 , 0.07009458, 0.5073263 ,
0.05608952, 0.54508806],
[0.31717595, 0.32450053, 0.34109431, 0.41336535, 0.31055744,
0.50589621, 0.64658897, 0.20041728, 0.45509476, 1. ,
0.63097054, 0.38461231, 0.36049923, 0.39218556, 0.43803484,
0.04888034, 0.29968391, 0.36653324, 0.31009315, 0.53634678,
0.67622201, 0.53297672, 0.34672634, 0.48485711, 0.11834538,
0.21201175, 0.29200755],
[0.12426828, 0.10424852, 0.13971053, 0.42167198, 0.39313159,
0.54157829, 0.44328343, 0.10827631, 0.27293185, 0.63097054,
1. , 0.37473303, 0.27758417, 0.40975132, 0.43061606,
0.10398078, 0.43890054, 0.17247006, 0.13587395, 0.57082201,
0.55913724, 0.54918159, 0.16805486, 0.56039301, 0.01598415,
0.13401211, 0.12123435],
[0.32679076, 0.18953488, 0.20874106, 0.38264425, 0.34284119,
0.34678732, 0.36475482, 0.2935387 , 0.26214854, 0.38461231,
0.37473303, 1. , 0.3363193 , 0.35628047, 0.18325139,
0.09828731, 0.2516869 , 0.22189711, 0.24651074, 0.28388963,
0.5918123 , 0.51856238, 0.46243876, 0.21310118, 0.22915223,
0.20462767, 0.3983947 ],
[0.48985768, 0.45902114, 0.28451058, 0.55974449, 0.52909726,
0.50189295, 0.31119524, 0.21051075, 0.5268653 , 0.36049923,
0.27758417, 0.3363193 , 1. , 0.79777694, 0.59505411,
0.4518857 , 0.44505984, 0.28271758, 0.57344589, 0.19456559,
0.458129 , 0.43234569, 0.19036497, 0.08839885, 0.37699043,
0.24207619, 0.56021353],
[0.43446939, 0.46497647, 0.23410013, 0.55331328, 0.4205376 ,
0.49485643, 0.21586737, 0.06590964, 0.39301529, 0.39218556,
0.40975132, 0.35628047, 0.79777694, 1. , 0.6308363 ,
0.32149781, 0.5272081 , 0.09332042, 0.53716251, 0.16351488,
0.41563538, 0.48059499, 0.1013236 , 0.06273486, 0.38919306,
0.41275791, 0.55455379],
[0.24805411, 0.40290215, 0.22394512, 0.607712 , 0.45651157,
0.59931599, 0.25855073, 0.19927088, 0.20092067, 0.43803484,
0.43061606, 0.18325139, 0.59505411, 0.6308363 , 1. ,
0.49208254, 0.46849452, 0.15870628, 0.61950688, 0.16479003,
0.41651019, 0.50257693, 0.10604121, 0.17070504, 0.12061015,
0.4970641 , 0.26864098],
[0.07312724, 0.24140622, 0.33097939, 0.41140598, 0.24160368,
0.33679247, 0.1721648 , 0.35203403, 0.06735267, 0.04888034,
0.10398078, 0.09828731, 0.4518857 , 0.32149781, 0.49208254,
1. , 0.13430383, 0.4397995 , 0.43421673, 0. ,
0.25991114, 0.18119544, 0.0587713 , 0.05913124, 0. ,
0.22081116, 0.26142734],
[0.26360285, 0.16303523, 0.06519597, 0.25048207, 0.34800801,
0.43595531, 0.07399165, 0.0244741 , 0.2308468 , 0.29968391,
0.43890054, 0.2516869 , 0.44505984, 0.5272081 , 0.46849452,
0.13430383, 1. , 0. , 0.38043379, 0.45538256,
0.30774166, 0.35901639, 0. , 0.2445998 , 0.14451833,
0.18640778, 0.22069578],
[0.0849059 , 0.10010346, 0.33803328, 0.14474891, 0.13149326,
0.04655236, 0.68146193, 0.28050536, 0.15640241, 0.36653324,
0.17247006, 0.22189711, 0.28271758, 0.09332042, 0.15870628,
0.4397995 , 0. , 1. , 0.21412044, 0.34795214,
0.49196503, 0.25781951, 0.30564773, 0.34327774, 0. ,
0.19330039, 0.32521673],
[0.17477676, 0.36632991, 0.27467108, 0.46625563, 0.43943034,
0.3927721 , 0.15586368, 0.15122737, 0.05962049, 0.31009315,
0.13587395, 0.24651074, 0.57344589, 0.53716251, 0.61950688,
0.43421673, 0.38043379, 0.21412044, 1. , 0.09095253,
0.342595 , 0.33258158, 0.19942638, 0.06979052, 0.12988944,
0.34903879, 0.30579854],
[0.07714055, 0. , 0.02078238, 0. , 0.0568891 ,
0.16917906, 0.38327503, 0.03120622, 0.16239785, 0.53634678,
0.57082201, 0.28388963, 0.19456559, 0.16351488, 0.16479003,
0. , 0.45538256, 0.34795214, 0.09095253, 1. ,
0.54114495, 0.55012884, 0.17713353, 0.8131213 , 0.07370846,
0.09507327, 0.14070141],
[0.36102125, 0.21012683, 0.33898223, 0.52756832, 0.35229527,
0.36581576, 0.66607827, 0.33603027, 0.39003611, 0.67622201,
0.55913724, 0.5918123 , 0.458129 , 0.41563538, 0.41651019,
0.25991114, 0.30774166, 0.49196503, 0.342595 , 0.54114495,
1. , 0.70826251, 0.55259636, 0.41694702, 0.24452784,
0.47836519, 0.43196319],
[0.32580206, 0.16645129, 0.18037318, 0.45944987, 0.25257893,
0.39361578, 0.42450317, 0.35123294, 0.39483518, 0.53297672,
0.54918159, 0.51856238, 0.43234569, 0.48059499, 0.50257693,
0.18119544, 0.35901639, 0.25781951, 0.33258158, 0.55012884,
0.70826251, 1. , 0.39620886, 0.38823622, 0.27526065,
0.42654893, 0.48992653],
[0.15758538, 0.16442598, 0.48589697, 0.3116387 , 0.13504185,
0.16589032, 0.48568076, 0.34137671, 0.2365806 , 0.34672634,
0.16805486, 0.46243876, 0.19036497, 0.1013236 , 0.10604121,
0.0587713 , 0. , 0.30564773, 0.19942638, 0.17713353,
0.55259636, 0.39620886, 1. , 0.14335282, 0.14229261,
0.42315459, 0.36417344],
[0. , 0. , 0. , 0. , 0.11786218,
0.20863287, 0.40721272, 0. , 0.07009458, 0.48485711,
0.56039301, 0.21310118, 0.08839885, 0.06273486, 0.17070504,
0.05913124, 0.2445998 , 0.34327774, 0.06979052, 0.8131213 ,
0.41694702, 0.38823622, 0.14335282, 1. , 0. ,
0. , 0. ],
[0.48787147, 0.40356568, 0.11129753, 0.11961723, 0.0812434 ,
0.29337732, 0.08420856, 0.20054543, 0.5073263 , 0.11834538,
0.01598415, 0.22915223, 0.37699043, 0.38919306, 0.12061015,
0. , 0.14451833, 0. , 0.12988944, 0.07370846,
0.24452784, 0.27526065, 0.14229261, 0. , 1. ,
0.4497523 , 0.57635109],
[0.14209634, 0.29916176, 0.34826045, 0.27108714, 0.05239613,
0.21146687, 0.2425777 , 0.31136764, 0.05608952, 0.21201175,
0.13401211, 0.20462767, 0.24207619, 0.41275791, 0.4970641 ,
0.22081116, 0.18640778, 0.19330039, 0.34903879, 0.09507327,
0.47836519, 0.42654893, 0.42315459, 0. , 0.4497523 ,
1. , 0.3831519 ],
[0.6188595 , 0.34769634, 0.30593496, 0.49497036, 0.42389844,
0.28769927, 0.26683661, 0.34737312, 0.54508806, 0.29200755,
0.12123435, 0.3983947 , 0.56021353, 0.55455379, 0.26864098,
0.26142734, 0.22069578, 0.32521673, 0.30579854, 0.14070141,
0.43196319, 0.48992653, 0.36417344, 0. , 0.57635109,
0.3831519 , 1. ]])
from sklearn.neighbors import NearestNeighbors
model_knn = NearestNeighbors(metric='cosine', n_neighbors=20, radius=1)
model_knn.fit(df_pivot_matrix)
NearestNeighbors(metric='cosine', n_neighbors=20, radius=1)
import json
with open('D:\BB_internship_Tasks\contractions.json','r') as f:
contractions_dict = json.load(f)
contractions = contractions_dict
def emoji(tweet):
# Smile -- :), : ), :-), (:, ( :, (-:, :') , :O
tweet = re.sub(r'(:\s?\)|:-\)|\(\s?:|\(-:|:\'\)|:O)', ' positiveemoji ', tweet)
# Laugh -- :D, : D, :-D, xD, x-D, XD, X-D
tweet = re.sub(r'(:\s?D|:-D|x-?D|X-?D)', ' positiveemoji ', tweet)
# Love -- <3, :*
tweet = re.sub(r'(<3|:\*)', ' positiveemoji ', tweet)
# Wink -- ;-), ;), ;-D, ;D, (;, (-; , @-)
tweet = re.sub(r'(;-?\)|;-?D|\(-?;|@-\))', ' positiveemoji ', tweet)
# Sad -- :-(, : (, :(, ):, )-:, :-/ , :-|
tweet = re.sub(r'(:\s?\(|:-\(|\)\s?:|\)-:|:-/|:-\|)', ' negetiveemoji ', tweet)
# Cry -- :,(, :'(, :"(
tweet = re.sub(r'(:,\(|:\'\(|:"\()', ' negetiveemoji ', tweet)
return tweet
import re
def process_tweet(tweet):
tweet = tweet.lower()
tweet = re.sub('^@[^\s]+', '', tweet)
tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ', tweet)
tweet = re.sub(r"\d+", " ", tweet)
tweet = re.sub('"', " ", str(tweet))
tweet = emoji(tweet)
tweet = re.sub(r"\b[a-zA-Z]\b", "", str(tweet))
for word in tweet.split():
if word.lower() in contractions:
tweet = tweet.replace(word, contractions[word.lower()])
tweet = re.sub(r"[^\w\s]", " ", str(tweet))
tweet = re.sub(r'(.)\1+', r'\1\1', tweet)
tweet = re.sub(r"\s+", " ", str(tweet))
return tweet
from nltk.corpus import stopwords
stop = stopwords.words('english')
def sentiments_2(text):
try:
return TextBlob(str(text)).sentiment.polarity
except:
return None
df['sentiment_2'] = df['Text'].apply(sentiments_2)
df
| ProductID | UserID | Rating | Text | sentiment | Updated_score | New_score | sentiment_2 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 777 | 0 | 4 | Great taffy at a great price. | 0.800000 | 3.200000 | 4 | 0.800000 |
| 1 | 767 | 3 | 4 | Absolutely wonderful - silky and sexy and comf... | 0.633333 | 2.533333 | 3 | 0.633333 |
| 2 | 1080 | 13 | 5 | Love this dress! it's sooo pretty. | 0.437500 | 2.187500 | 3 | 0.437500 |
| 3 | 1077 | 3 | 3 | I had such high hopes for this dress and reall... | 0.120000 | 0.360000 | 2 | 0.120000 |
| 4 | 1049 | 3 | 5 | I love, love, love this jumpsuit. it's fun, fl... | 0.550000 | 2.750000 | 3 | 0.550000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 629 | 823 | 41 | 1 | I placed order 4+1 soaps.But I have received w... | 0.000000 | 0.000000 | 1 | 0.000000 |
| 630 | 823 | 41 | 3 | The soap is ok for bathing, no scent at all, m... | 0.325000 | 0.975000 | 2 | 0.325000 |
| 631 | 847 | 41 | 5 | For a long time I was searching for Indian soa... | -0.025000 | -0.125000 | 1 | -0.025000 |
| 632 | 910 | 7 | 3 | Good but not great | 0.150000 | 0.450000 | 2 | 0.150000 |
| 633 | 333 | 13 | 5 | Quick,easy to make & tasty too. | 0.000000 | 0.000000 | 1 | 0.000000 |
634 rows × 8 columns
df['processed_text'] = df['Text'].apply(process_tweet)
#df['processed_text'] = df['processed_text'].apply
df
| ProductID | UserID | Rating | Text | sentiment | Updated_score | New_score | sentiment_2 | processed_text | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 777 | 0 | 4 | Great taffy at a great price. | 0.800000 | 3.200000 | 4 | 0.800000 | great taffy at great price |
| 1 | 767 | 3 | 4 | Absolutely wonderful - silky and sexy and comf... | 0.633333 | 2.533333 | 3 | 0.633333 | absolutely wonderful silky and sexy and comfor... |
| 2 | 1080 | 13 | 5 | Love this dress! it's sooo pretty. | 0.437500 | 2.187500 | 3 | 0.437500 | love this dress it soo pretty |
| 3 | 1077 | 3 | 3 | I had such high hopes for this dress and reall... | 0.120000 | 0.360000 | 2 | 0.120000 | had such high hopes for this dress and really... |
| 4 | 1049 | 3 | 5 | I love, love, love this jumpsuit. it's fun, fl... | 0.550000 | 2.750000 | 3 | 0.550000 | love love love this jumpsuit it fun flirty an... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 629 | 823 | 41 | 1 | I placed order 4+1 soaps.But I have received w... | 0.000000 | 0.000000 | 1 | 0.000000 | placed order soaps but have received without ... |
| 630 | 823 | 41 | 3 | The soap is ok for bathing, no scent at all, m... | 0.325000 | 0.975000 | 2 | 0.325000 | the soap is ok for bathing no scent at all mor... |
| 631 | 847 | 41 | 5 | For a long time I was searching for Indian soa... | -0.025000 | -0.125000 | 1 | -0.025000 | for long time was searching for indian soap eq... |
| 632 | 910 | 7 | 3 | Good but not great | 0.150000 | 0.450000 | 2 | 0.150000 | good but not great |
| 633 | 333 | 13 | 5 | Quick,easy to make & tasty too. | 0.000000 | 0.000000 | 1 | 0.000000 | quick easy to make tasty too |
634 rows × 9 columns
product_ID = int(input('Enter Product Id: '))
data = df_pivot.index.to_list()
data
Enter Product Id: 369
[89, 333, 369, 444, 684, 697, 767, 777, 823, 847, 853, 858, 862, 910, 949, 1002, 1003, 1049, 1060, 1065, 1077, 1080, 1095, 1120, 6969, 8001, 9696]
query_index = data.index(product_ID)
print(query_index)
2
distance, indices = model_knn.kneighbors(df_pivot.iloc[query_index,:].values.reshape(1,-1), n_neighbors = 8)
print(distance)
print(indices)
[[0. 0.47132438 0.51410303 0.58897769 0.58952857 0.61765874 0.63217631 0.64803948]] [[ 2 6 22 3 7 1 8 5]]
f=df_pivot.index[indices.flatten()]
f
Int64Index([369, 767, 1095, 444, 777, 333, 823, 697], dtype='int64', name='ProductID')
#import pandas as pd
#x=pd.DataFrame(df_pivot.index(f))
#x
#Presenting data in tablular format...
p_id = int(input('Enter product id: '))
data = df_pivot.index.to_list()
query_index = data.index(p_id)
distance, indices = model_knn.kneighbors(df_pivot.iloc[query_index,:].values.reshape(1,-1), n_neighbors = 8)
#print(distance)
#print(indices)
x=pd.DataFrame(df_pivot.index[indices.flatten()])
x.drop(0) #here the first row should not be displayed so
Enter product id: 369
| ProductID | |
|---|---|
| 1 | 767 |
| 2 | 1095 |
| 3 | 444 |
| 4 | 777 |
| 5 | 333 |
| 6 | 823 |
| 7 | 697 |
print(type(distance))
<class 'numpy.ndarray'>
distance.tolist(), indices.tolist()
([[0.0, 0.4713243830572561, 0.5141030253286081, 0.5889776911252591, 0.5895285707784645, 0.6176587445860691, 0.6321763125811057, 0.6480394832572696]], [[2, 6, 22, 3, 7, 1, 8, 5]])
list(zip(distance.tolist()[0], indices.tolist()[0]))
[(0.0, 2), (0.4713243830572561, 6), (0.5141030253286081, 22), (0.5889776911252591, 3), (0.5895285707784645, 7), (0.6176587445860691, 1), (0.6321763125811057, 8), (0.6480394832572696, 5)]
val= list(df['sentiment_2'].values)
netural=0
negative=0
positive=0
for i in val:
if i > 0:
positive += 1
elif i < 0:
negative += 1
else:
netural += 1
print('Neutrals', netural)
print('Positive', positive)
print('Negative', negative)
Neutrals 85 Positive 451 Negative 98
import matplotlib.pyplot as plt
import numpy as np
p= np.array([netural,positive,negative])
mylabel=['Neutral',' Positive', 'Negative']
shad = [0.3,0.3,0.4]
plt.pie(p, labels=mylabel, explode=shad, shadow= True, autopct ="%2.1f")
plt.show()
#vals = df['Rating'].value_counts().tolist()
#labs = df['Rating'].unique().tolist()
#plt.bar(labs, vals)
#plt.xlabel("Rating")
#plt.ylabel("No. Of Reviews")
#plt.show()
#vals
one=0
two=0
three=0
four=0
five=0
zero=0
vals = list(df['Rating'].values)
for i in vals:
if i == 0:
zero += 1
elif i == 1:
one += 1
elif i == 2:
two += 1
elif i == 3:
three += 1
elif i == 4:
four += 1
elif i == 5:
five += 1
print('0 Rating', zero)
print('1 Rating', one)
print('2 Rating', two)
print('3 Rating', three)
print('4 Rating', four)
print('5 Rating', five)
0 Rating 1 1 Rating 125 2 Rating 34 3 Rating 48 4 Rating 86 5 Rating 340
x_rating = np.array([0,1,2,3,4,5])
y_review = np.array([zero,one,two,three,four,five])
plt.bar(x_rating,y_review, width=0.5, color='yellow')
plt.xlabel("Rating from 0-5")
plt.ylabel("No. of Review")
plt.show()