In [1]:
import pandas as pd
df=pd.read_csv(r'D:\BB_internship_Tasks\cloths-rating.csv')
df
Out[1]:
ProductID UserID Rating Text
0 777 AV1YnR7wglJLPUi8IJmi 4 Great taffy at a great price.
1 767 AVpfpK8KLJeJML43BCuD 4 Absolutely wonderful - silky and sexy and comf...
2 1080 AVqkIdntQMlgsOJE6fuB 5 Love this dress! it's sooo pretty.
3 1077 AVpfpK8KLJeJML43BCuD 3 I had such high hopes for this dress and reall...
4 1049 AVpfpK8KLJeJML43BCuD 5 I love, love, love this jumpsuit. it's fun, fl...
... ... ... ... ...
629 823 B08GWV3SM6 1 I placed order 4+1 soaps.But I have received w...
630 823 B08GWV3SM6 3 The soap is ok for bathing, no scent at all, m...
631 847 B08GWV3SM6 5 For a long time I was searching for Indian soa...
632 910 AVph0EeEilAPnD_x9myq 3 Good but not great
633 333 AVqkIdntQMlgsOJE6fuB 5 Quick,easy to make & tasty too.

634 rows × 4 columns

In [2]:
import numpy as np
!pip install textblob
from textblob import TextBlob
Requirement already satisfied: textblob in c:\anaconda\lib\site-packages (0.17.1)
Requirement already satisfied: nltk>=3.1 in c:\anaconda\lib\site-packages (from textblob) (3.7)
Requirement already satisfied: tqdm in c:\anaconda\lib\site-packages (from nltk>=3.1->textblob) (4.64.0)
Requirement already satisfied: regex>=2021.8.3 in c:\anaconda\lib\site-packages (from nltk>=3.1->textblob) (2022.3.15)
Requirement already satisfied: click in c:\anaconda\lib\site-packages (from nltk>=3.1->textblob) (8.0.4)
Requirement already satisfied: joblib in c:\anaconda\lib\site-packages (from nltk>=3.1->textblob) (1.1.0)
Requirement already satisfied: colorama in c:\anaconda\lib\site-packages (from click->nltk>=3.1->textblob) (0.4.4)
In [3]:
def sentiment(text):
    try:
        return TextBlob(str(text)).sentiment.polarity
    except:
        return None
df['sentiment'] = df['Text'].apply(sentiment)
df
Out[3]:
ProductID UserID Rating Text sentiment
0 777 AV1YnR7wglJLPUi8IJmi 4 Great taffy at a great price. 0.800000
1 767 AVpfpK8KLJeJML43BCuD 4 Absolutely wonderful - silky and sexy and comf... 0.633333
2 1080 AVqkIdntQMlgsOJE6fuB 5 Love this dress! it's sooo pretty. 0.437500
3 1077 AVpfpK8KLJeJML43BCuD 3 I had such high hopes for this dress and reall... 0.120000
4 1049 AVpfpK8KLJeJML43BCuD 5 I love, love, love this jumpsuit. it's fun, fl... 0.550000
... ... ... ... ... ...
629 823 B08GWV3SM6 1 I placed order 4+1 soaps.But I have received w... 0.000000
630 823 B08GWV3SM6 3 The soap is ok for bathing, no scent at all, m... 0.325000
631 847 B08GWV3SM6 5 For a long time I was searching for Indian soa... -0.025000
632 910 AVph0EeEilAPnD_x9myq 3 Good but not great 0.150000
633 333 AVqkIdntQMlgsOJE6fuB 5 Quick,easy to make & tasty too. 0.000000

634 rows × 5 columns

In [4]:
df['Updated_score'] = df['Rating']*df['sentiment']
df
Out[4]:
ProductID UserID Rating Text sentiment Updated_score
0 777 AV1YnR7wglJLPUi8IJmi 4 Great taffy at a great price. 0.800000 3.200000
1 767 AVpfpK8KLJeJML43BCuD 4 Absolutely wonderful - silky and sexy and comf... 0.633333 2.533333
2 1080 AVqkIdntQMlgsOJE6fuB 5 Love this dress! it's sooo pretty. 0.437500 2.187500
3 1077 AVpfpK8KLJeJML43BCuD 3 I had such high hopes for this dress and reall... 0.120000 0.360000
4 1049 AVpfpK8KLJeJML43BCuD 5 I love, love, love this jumpsuit. it's fun, fl... 0.550000 2.750000
... ... ... ... ... ... ...
629 823 B08GWV3SM6 1 I placed order 4+1 soaps.But I have received w... 0.000000 0.000000
630 823 B08GWV3SM6 3 The soap is ok for bathing, no scent at all, m... 0.325000 0.975000
631 847 B08GWV3SM6 5 For a long time I was searching for Indian soa... -0.025000 -0.125000
632 910 AVph0EeEilAPnD_x9myq 3 Good but not great 0.150000 0.450000
633 333 AVqkIdntQMlgsOJE6fuB 5 Quick,easy to make & tasty too. 0.000000 0.000000

634 rows × 6 columns

In [5]:
b=[-1,-0.75,-0.5,-0.25,0,0.25,0.5,0.75,1]
l=[]
for i in b:
    for j in range(1,6):
        x=i*j
        l.append(x)
print(l)
len(l)
[-1, -2, -3, -4, -5, -0.75, -1.5, -2.25, -3.0, -3.75, -0.5, -1.0, -1.5, -2.0, -2.5, -0.25, -0.5, -0.75, -1.0, -1.25, 0, 0, 0, 0, 0, 0.25, 0.5, 0.75, 1.0, 1.25, 0.5, 1.0, 1.5, 2.0, 2.5, 0.75, 1.5, 2.25, 3.0, 3.75, 1, 2, 3, 4, 5]
Out[5]:
45
In [6]:
len(set(l))
Out[6]:
27
In [7]:
def equalizer(v):
    if v <= 0:
        return 1
    elif v <= 2:
        return 2
    elif v <= 3:
        return 3
    elif v <= 4:
        return 4
    elif v <= 5:
        return 5

df['New_score'] = df['Updated_score'].apply(equalizer)
df
Out[7]:
ProductID UserID Rating Text sentiment Updated_score New_score
0 777 AV1YnR7wglJLPUi8IJmi 4 Great taffy at a great price. 0.800000 3.200000 4
1 767 AVpfpK8KLJeJML43BCuD 4 Absolutely wonderful - silky and sexy and comf... 0.633333 2.533333 3
2 1080 AVqkIdntQMlgsOJE6fuB 5 Love this dress! it's sooo pretty. 0.437500 2.187500 3
3 1077 AVpfpK8KLJeJML43BCuD 3 I had such high hopes for this dress and reall... 0.120000 0.360000 2
4 1049 AVpfpK8KLJeJML43BCuD 5 I love, love, love this jumpsuit. it's fun, fl... 0.550000 2.750000 3
... ... ... ... ... ... ... ...
629 823 B08GWV3SM6 1 I placed order 4+1 soaps.But I have received w... 0.000000 0.000000 1
630 823 B08GWV3SM6 3 The soap is ok for bathing, no scent at all, m... 0.325000 0.975000 2
631 847 B08GWV3SM6 5 For a long time I was searching for Indian soa... -0.025000 -0.125000 1
632 910 AVph0EeEilAPnD_x9myq 3 Good but not great 0.150000 0.450000 2
633 333 AVqkIdntQMlgsOJE6fuB 5 Quick,easy to make & tasty too. 0.000000 0.000000 1

634 rows × 7 columns

Label Encoding User id and product id

In [8]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['UserID'] = le.fit_transform(df['UserID'])
df
Out[8]:
ProductID UserID Rating Text sentiment Updated_score New_score
0 777 0 4 Great taffy at a great price. 0.800000 3.200000 4
1 767 3 4 Absolutely wonderful - silky and sexy and comf... 0.633333 2.533333 3
2 1080 13 5 Love this dress! it's sooo pretty. 0.437500 2.187500 3
3 1077 3 3 I had such high hopes for this dress and reall... 0.120000 0.360000 2
4 1049 3 5 I love, love, love this jumpsuit. it's fun, fl... 0.550000 2.750000 3
... ... ... ... ... ... ... ...
629 823 41 1 I placed order 4+1 soaps.But I have received w... 0.000000 0.000000 1
630 823 41 3 The soap is ok for bathing, no scent at all, m... 0.325000 0.975000 2
631 847 41 5 For a long time I was searching for Indian soa... -0.025000 -0.125000 1
632 910 7 3 Good but not great 0.150000 0.450000 2
633 333 13 5 Quick,easy to make & tasty too. 0.000000 0.000000 1

634 rows × 7 columns

In [9]:
#from sklearn.preprocessing import LabelEncoder

#le = LabelEncoder()
#df['ProductID'] = le.fit_transform(df['ProductID'])
#df
In [10]:
#we assign product name to product id were name is given in json file named pname.json

#le = LabelEncoder()
#df['ProductNAME'] = le.fit_transform(df['ProductID'])
#df

Making Sparse Matrix with the helpof maling pivot table along with using labelencoding¶

In [11]:
df_pivot = df.pivot_table(index='ProductID', columns='UserID', values='New_score').fillna(0)
df_pivot
Out[11]:
UserID 0 1 2 3 4 5 6 7 8 9 ... 32 33 34 35 36 37 38 39 40 41
ProductID
89 0.0 0.000000 0.000000 2.0 0.0 0.0 5.00 1.000000 2.0 0.0 ... 0.000000 2.00 0.0 0.0 0.000000 3.000000 0.000000 0.000000 0.000000 3.00
333 0.0 2.000000 0.000000 0.0 0.0 3.0 0.00 0.000000 4.0 1.0 ... 0.000000 1.00 3.0 0.0 0.000000 1.750000 0.000000 0.000000 0.000000 0.00
369 4.0 1.000000 0.000000 2.0 0.0 3.0 4.00 2.000000 0.0 0.0 ... 0.000000 0.00 0.0 0.0 0.000000 0.000000 1.333333 0.000000 5.000000 0.00
444 2.0 2.000000 0.000000 0.0 1.0 0.0 3.00 5.000000 2.0 0.0 ... 2.000000 0.00 2.5 0.0 0.000000 1.666667 0.000000 0.000000 2.666667 0.00
684 0.0 0.000000 3.000000 3.0 2.0 0.0 1.00 2.000000 2.0 0.0 ... 2.500000 0.00 3.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 2.25
697 0.0 1.000000 2.000000 0.0 0.0 0.0 4.00 0.000000 1.0 2.0 ... 3.000000 2.00 3.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 2.50
767 2.0 0.000000 0.000000 3.0 0.0 0.0 2.00 0.000000 0.0 0.0 ... 0.000000 1.00 0.0 0.0 2.000000 0.000000 1.000000 1.000000 0.000000 0.00
777 4.0 0.000000 0.000000 0.0 0.0 0.0 1.00 0.000000 0.0 2.0 ... 0.000000 1.00 0.0 0.0 0.000000 0.000000 4.000000 0.000000 0.000000 0.00
823 0.0 0.000000 0.000000 4.0 0.0 0.0 5.00 1.000000 0.0 0.0 ... 0.000000 5.00 0.0 0.0 0.000000 4.000000 0.000000 0.000000 0.000000 1.50
847 0.0 0.000000 0.000000 2.0 0.0 0.0 2.00 2.333333 5.0 0.0 ... 0.000000 3.00 0.0 0.0 3.000000 0.000000 0.000000 2.000000 0.000000 1.00
853 0.0 0.000000 1.000000 0.0 0.0 0.0 1.00 0.000000 0.0 0.0 ... 0.000000 0.00 0.0 0.0 1.000000 0.000000 0.000000 2.000000 0.000000 0.00
858 0.0 0.000000 0.000000 0.0 2.5 0.0 3.00 2.000000 2.0 4.0 ... 0.000000 0.00 0.0 0.0 0.000000 0.000000 0.000000 4.000000 0.000000 0.00
862 2.0 2.333333 0.000000 3.0 2.5 0.0 2.25 1.000000 2.5 0.0 ... 1.666667 2.75 5.0 0.0 0.000000 3.200000 0.000000 0.000000 0.000000 0.00
910 0.0 2.666667 0.000000 0.0 0.0 0.0 3.00 3.000000 3.5 0.0 ... 0.000000 3.00 4.0 0.0 0.000000 2.000000 0.000000 0.000000 0.000000 0.00
949 2.0 2.000000 2.000000 0.0 0.0 0.0 1.00 0.000000 3.0 0.0 ... 2.000000 0.00 2.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 2.00
1002 4.0 4.000000 1.000000 0.0 0.0 0.0 1.00 0.000000 0.0 0.0 ... 1.000000 0.00 2.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.00
1003 0.0 0.000000 0.000000 0.0 0.0 0.0 1.00 0.000000 2.0 0.0 ... 2.000000 0.00 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.00
1049 2.0 3.000000 0.000000 3.0 0.0 0.0 0.00 0.000000 0.0 0.0 ... 0.000000 0.00 0.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.00
1060 1.5 2.000000 1.333333 0.0 2.0 0.0 0.00 2.000000 2.0 0.0 ... 2.250000 0.00 2.0 0.0 0.000000 0.000000 0.000000 0.000000 0.000000 0.00
1065 0.0 0.000000 0.000000 1.0 0.0 0.0 0.00 0.000000 0.0 0.0 ... 0.000000 0.00 0.0 0.0 2.500000 0.000000 0.000000 3.000000 0.000000 0.00
1077 0.0 2.000000 0.000000 2.0 1.0 0.0 1.50 1.500000 1.0 2.0 ... 0.000000 0.00 0.0 0.0 1.333333 2.000000 0.000000 3.166667 0.000000 0.00
1080 0.0 2.000000 0.000000 0.0 1.0 0.0 2.00 1.000000 0.0 1.0 ... 0.000000 4.00 0.0 0.0 1.000000 0.000000 0.000000 4.000000 0.000000 0.00
1095 0.0 0.000000 0.000000 1.5 3.0 0.0 0.00 1.666667 0.0 2.0 ... 0.000000 0.00 0.0 4.0 0.000000 0.000000 1.500000 1.500000 3.500000 0.00
1120 0.0 0.000000 2.000000 0.0 0.0 0.0 0.00 0.000000 0.0 0.0 ... 0.000000 0.00 0.0 0.0 2.000000 0.000000 0.000000 1.000000 0.000000 0.00
6969 0.0 0.000000 0.000000 0.0 0.0 0.0 0.00 0.000000 0.0 3.0 ... 0.000000 3.00 0.0 0.0 0.000000 3.000000 0.000000 0.000000 0.000000 0.00
8001 0.0 2.000000 0.000000 0.0 0.0 0.0 0.00 0.000000 0.0 3.5 ... 0.000000 0.00 0.0 0.0 0.000000 0.000000 2.000000 0.000000 1.000000 0.00
9696 1.0 2.000000 0.000000 2.0 0.0 0.0 1.00 2.500000 1.0 1.0 ... 0.000000 2.75 0.0 0.0 0.000000 1.500000 0.000000 0.000000 0.000000 0.00

27 rows × 42 columns

Sparse Matrix (Compressed Sparse)¶

In [12]:
from scipy.sparse import csr_matrix

df_pivot_matrix = csr_matrix(df_pivot.values)
print(df_pivot_matrix)
  (0, 3)	2.0
  (0, 6)	5.0
  (0, 7)	1.0
  (0, 8)	2.0
  (0, 11)	3.0
  (0, 16)	2.5
  (0, 18)	3.0
  (0, 20)	1.0
  (0, 22)	4.0
  (0, 28)	3.0
  (0, 33)	2.0
  (0, 37)	3.0
  (0, 41)	3.0
  (1, 1)	2.0
  (1, 5)	3.0
  (1, 8)	4.0
  (1, 9)	1.0
  (1, 10)	2.0
  (1, 13)	2.0
  (1, 17)	1.25
  (1, 22)	4.0
  (1, 25)	1.25
  (1, 29)	2.5
  (1, 33)	1.0
  (1, 34)	3.0
  :	:
  (25, 17)	4.0
  (25, 21)	2.0
  (25, 22)	1.0
  (25, 25)	2.0
  (25, 30)	3.3333333333333335
  (25, 38)	2.0
  (25, 40)	1.0
  (26, 0)	1.0
  (26, 1)	2.0
  (26, 3)	2.0
  (26, 6)	1.0
  (26, 7)	2.5
  (26, 8)	1.0
  (26, 9)	1.0
  (26, 11)	2.0
  (26, 16)	2.0
  (26, 17)	1.6
  (26, 18)	2.3333333333333335
  (26, 19)	1.0
  (26, 22)	1.0
  (26, 23)	2.0
  (26, 25)	3.3333333333333335
  (26, 28)	2.0
  (26, 33)	2.75
  (26, 37)	1.5

28-06-2022¶

In [13]:
from textblob import TextBlob
from sklearn.metrics.pairwise import cosine_similarity
In [14]:
similarity_matrix = cosine_similarity(df_pivot)
similarity_matrix
Out[14]:
array([[1.        , 0.34676265, 0.33723697, 0.38107647, 0.49477232,
        0.4627069 , 0.26858532, 0.26651864, 0.77150131, 0.31717595,
        0.12426828, 0.32679076, 0.48985768, 0.43446939, 0.24805411,
        0.07312724, 0.26360285, 0.0849059 , 0.17477676, 0.07714055,
        0.36102125, 0.32580206, 0.15758538, 0.        , 0.48787147,
        0.14209634, 0.6188595 ],
       [0.34676265, 1.        , 0.38234126, 0.27725706, 0.24058911,
        0.44108693, 0.17812137, 0.04713354, 0.26572428, 0.32450053,
        0.10424852, 0.18953488, 0.45902114, 0.46497647, 0.40290215,
        0.24140622, 0.16303523, 0.10010346, 0.36632991, 0.        ,
        0.21012683, 0.16645129, 0.16442598, 0.        , 0.40356568,
        0.29916176, 0.34769634],
       [0.33723697, 0.38234126, 1.        , 0.41102231, 0.15576679,
        0.35196052, 0.52867562, 0.41047143, 0.36782369, 0.34109431,
        0.13971053, 0.20874106, 0.28451058, 0.23410013, 0.22394512,
        0.33097939, 0.06519597, 0.33803328, 0.27467108, 0.02078238,
        0.33898223, 0.18037318, 0.48589697, 0.        , 0.11129753,
        0.34826045, 0.30593496],
       [0.38107647, 0.27725706, 0.41102231, 1.        , 0.62813508,
        0.51757807, 0.32052195, 0.34077389, 0.33004843, 0.41336535,
        0.42167198, 0.38264425, 0.55974449, 0.55331328, 0.607712  ,
        0.41140598, 0.25048207, 0.14474891, 0.46625563, 0.        ,
        0.52756832, 0.45944987, 0.3116387 , 0.        , 0.11961723,
        0.27108714, 0.49497036],
       [0.49477232, 0.24058911, 0.15576679, 0.62813508, 1.        ,
        0.56489663, 0.21779944, 0.20179174, 0.31660479, 0.31055744,
        0.39313159, 0.34284119, 0.52909726, 0.4205376 , 0.45651157,
        0.24160368, 0.34800801, 0.13149326, 0.43943034, 0.0568891 ,
        0.35229527, 0.25257893, 0.13504185, 0.11786218, 0.0812434 ,
        0.05239613, 0.42389844],
       [0.4627069 , 0.44108693, 0.35196052, 0.51757807, 0.56489663,
        1.        , 0.3374719 , 0.20944919, 0.54894968, 0.50589621,
        0.54157829, 0.34678732, 0.50189295, 0.49485643, 0.59931599,
        0.33679247, 0.43595531, 0.04655236, 0.3927721 , 0.16917906,
        0.36581576, 0.39361578, 0.16589032, 0.20863287, 0.29337732,
        0.21146687, 0.28769927],
       [0.26858532, 0.17812137, 0.52867562, 0.32052195, 0.21779944,
        0.3374719 , 1.        , 0.42306669, 0.42672391, 0.64658897,
        0.44328343, 0.36475482, 0.31119524, 0.21586737, 0.25855073,
        0.1721648 , 0.07399165, 0.68146193, 0.15586368, 0.38327503,
        0.66607827, 0.42450317, 0.48568076, 0.40721272, 0.08420856,
        0.2425777 , 0.26683661],
       [0.26651864, 0.04713354, 0.41047143, 0.34077389, 0.20179174,
        0.20944919, 0.42306669, 1.        , 0.23319906, 0.20041728,
        0.10827631, 0.2935387 , 0.21051075, 0.06590964, 0.19927088,
        0.35203403, 0.0244741 , 0.28050536, 0.15122737, 0.03120622,
        0.33603027, 0.35123294, 0.34137671, 0.        , 0.20054543,
        0.31136764, 0.34737312],
       [0.77150131, 0.26572428, 0.36782369, 0.33004843, 0.31660479,
        0.54894968, 0.42672391, 0.23319906, 1.        , 0.45509476,
        0.27293185, 0.26214854, 0.5268653 , 0.39301529, 0.20092067,
        0.06735267, 0.2308468 , 0.15640241, 0.05962049, 0.16239785,
        0.39003611, 0.39483518, 0.2365806 , 0.07009458, 0.5073263 ,
        0.05608952, 0.54508806],
       [0.31717595, 0.32450053, 0.34109431, 0.41336535, 0.31055744,
        0.50589621, 0.64658897, 0.20041728, 0.45509476, 1.        ,
        0.63097054, 0.38461231, 0.36049923, 0.39218556, 0.43803484,
        0.04888034, 0.29968391, 0.36653324, 0.31009315, 0.53634678,
        0.67622201, 0.53297672, 0.34672634, 0.48485711, 0.11834538,
        0.21201175, 0.29200755],
       [0.12426828, 0.10424852, 0.13971053, 0.42167198, 0.39313159,
        0.54157829, 0.44328343, 0.10827631, 0.27293185, 0.63097054,
        1.        , 0.37473303, 0.27758417, 0.40975132, 0.43061606,
        0.10398078, 0.43890054, 0.17247006, 0.13587395, 0.57082201,
        0.55913724, 0.54918159, 0.16805486, 0.56039301, 0.01598415,
        0.13401211, 0.12123435],
       [0.32679076, 0.18953488, 0.20874106, 0.38264425, 0.34284119,
        0.34678732, 0.36475482, 0.2935387 , 0.26214854, 0.38461231,
        0.37473303, 1.        , 0.3363193 , 0.35628047, 0.18325139,
        0.09828731, 0.2516869 , 0.22189711, 0.24651074, 0.28388963,
        0.5918123 , 0.51856238, 0.46243876, 0.21310118, 0.22915223,
        0.20462767, 0.3983947 ],
       [0.48985768, 0.45902114, 0.28451058, 0.55974449, 0.52909726,
        0.50189295, 0.31119524, 0.21051075, 0.5268653 , 0.36049923,
        0.27758417, 0.3363193 , 1.        , 0.79777694, 0.59505411,
        0.4518857 , 0.44505984, 0.28271758, 0.57344589, 0.19456559,
        0.458129  , 0.43234569, 0.19036497, 0.08839885, 0.37699043,
        0.24207619, 0.56021353],
       [0.43446939, 0.46497647, 0.23410013, 0.55331328, 0.4205376 ,
        0.49485643, 0.21586737, 0.06590964, 0.39301529, 0.39218556,
        0.40975132, 0.35628047, 0.79777694, 1.        , 0.6308363 ,
        0.32149781, 0.5272081 , 0.09332042, 0.53716251, 0.16351488,
        0.41563538, 0.48059499, 0.1013236 , 0.06273486, 0.38919306,
        0.41275791, 0.55455379],
       [0.24805411, 0.40290215, 0.22394512, 0.607712  , 0.45651157,
        0.59931599, 0.25855073, 0.19927088, 0.20092067, 0.43803484,
        0.43061606, 0.18325139, 0.59505411, 0.6308363 , 1.        ,
        0.49208254, 0.46849452, 0.15870628, 0.61950688, 0.16479003,
        0.41651019, 0.50257693, 0.10604121, 0.17070504, 0.12061015,
        0.4970641 , 0.26864098],
       [0.07312724, 0.24140622, 0.33097939, 0.41140598, 0.24160368,
        0.33679247, 0.1721648 , 0.35203403, 0.06735267, 0.04888034,
        0.10398078, 0.09828731, 0.4518857 , 0.32149781, 0.49208254,
        1.        , 0.13430383, 0.4397995 , 0.43421673, 0.        ,
        0.25991114, 0.18119544, 0.0587713 , 0.05913124, 0.        ,
        0.22081116, 0.26142734],
       [0.26360285, 0.16303523, 0.06519597, 0.25048207, 0.34800801,
        0.43595531, 0.07399165, 0.0244741 , 0.2308468 , 0.29968391,
        0.43890054, 0.2516869 , 0.44505984, 0.5272081 , 0.46849452,
        0.13430383, 1.        , 0.        , 0.38043379, 0.45538256,
        0.30774166, 0.35901639, 0.        , 0.2445998 , 0.14451833,
        0.18640778, 0.22069578],
       [0.0849059 , 0.10010346, 0.33803328, 0.14474891, 0.13149326,
        0.04655236, 0.68146193, 0.28050536, 0.15640241, 0.36653324,
        0.17247006, 0.22189711, 0.28271758, 0.09332042, 0.15870628,
        0.4397995 , 0.        , 1.        , 0.21412044, 0.34795214,
        0.49196503, 0.25781951, 0.30564773, 0.34327774, 0.        ,
        0.19330039, 0.32521673],
       [0.17477676, 0.36632991, 0.27467108, 0.46625563, 0.43943034,
        0.3927721 , 0.15586368, 0.15122737, 0.05962049, 0.31009315,
        0.13587395, 0.24651074, 0.57344589, 0.53716251, 0.61950688,
        0.43421673, 0.38043379, 0.21412044, 1.        , 0.09095253,
        0.342595  , 0.33258158, 0.19942638, 0.06979052, 0.12988944,
        0.34903879, 0.30579854],
       [0.07714055, 0.        , 0.02078238, 0.        , 0.0568891 ,
        0.16917906, 0.38327503, 0.03120622, 0.16239785, 0.53634678,
        0.57082201, 0.28388963, 0.19456559, 0.16351488, 0.16479003,
        0.        , 0.45538256, 0.34795214, 0.09095253, 1.        ,
        0.54114495, 0.55012884, 0.17713353, 0.8131213 , 0.07370846,
        0.09507327, 0.14070141],
       [0.36102125, 0.21012683, 0.33898223, 0.52756832, 0.35229527,
        0.36581576, 0.66607827, 0.33603027, 0.39003611, 0.67622201,
        0.55913724, 0.5918123 , 0.458129  , 0.41563538, 0.41651019,
        0.25991114, 0.30774166, 0.49196503, 0.342595  , 0.54114495,
        1.        , 0.70826251, 0.55259636, 0.41694702, 0.24452784,
        0.47836519, 0.43196319],
       [0.32580206, 0.16645129, 0.18037318, 0.45944987, 0.25257893,
        0.39361578, 0.42450317, 0.35123294, 0.39483518, 0.53297672,
        0.54918159, 0.51856238, 0.43234569, 0.48059499, 0.50257693,
        0.18119544, 0.35901639, 0.25781951, 0.33258158, 0.55012884,
        0.70826251, 1.        , 0.39620886, 0.38823622, 0.27526065,
        0.42654893, 0.48992653],
       [0.15758538, 0.16442598, 0.48589697, 0.3116387 , 0.13504185,
        0.16589032, 0.48568076, 0.34137671, 0.2365806 , 0.34672634,
        0.16805486, 0.46243876, 0.19036497, 0.1013236 , 0.10604121,
        0.0587713 , 0.        , 0.30564773, 0.19942638, 0.17713353,
        0.55259636, 0.39620886, 1.        , 0.14335282, 0.14229261,
        0.42315459, 0.36417344],
       [0.        , 0.        , 0.        , 0.        , 0.11786218,
        0.20863287, 0.40721272, 0.        , 0.07009458, 0.48485711,
        0.56039301, 0.21310118, 0.08839885, 0.06273486, 0.17070504,
        0.05913124, 0.2445998 , 0.34327774, 0.06979052, 0.8131213 ,
        0.41694702, 0.38823622, 0.14335282, 1.        , 0.        ,
        0.        , 0.        ],
       [0.48787147, 0.40356568, 0.11129753, 0.11961723, 0.0812434 ,
        0.29337732, 0.08420856, 0.20054543, 0.5073263 , 0.11834538,
        0.01598415, 0.22915223, 0.37699043, 0.38919306, 0.12061015,
        0.        , 0.14451833, 0.        , 0.12988944, 0.07370846,
        0.24452784, 0.27526065, 0.14229261, 0.        , 1.        ,
        0.4497523 , 0.57635109],
       [0.14209634, 0.29916176, 0.34826045, 0.27108714, 0.05239613,
        0.21146687, 0.2425777 , 0.31136764, 0.05608952, 0.21201175,
        0.13401211, 0.20462767, 0.24207619, 0.41275791, 0.4970641 ,
        0.22081116, 0.18640778, 0.19330039, 0.34903879, 0.09507327,
        0.47836519, 0.42654893, 0.42315459, 0.        , 0.4497523 ,
        1.        , 0.3831519 ],
       [0.6188595 , 0.34769634, 0.30593496, 0.49497036, 0.42389844,
        0.28769927, 0.26683661, 0.34737312, 0.54508806, 0.29200755,
        0.12123435, 0.3983947 , 0.56021353, 0.55455379, 0.26864098,
        0.26142734, 0.22069578, 0.32521673, 0.30579854, 0.14070141,
        0.43196319, 0.48992653, 0.36417344, 0.        , 0.57635109,
        0.3831519 , 1.        ]])
In [15]:
from sklearn.neighbors import NearestNeighbors

model_knn = NearestNeighbors(metric='cosine', n_neighbors=20, radius=1)
model_knn.fit(df_pivot_matrix)
Out[15]:
NearestNeighbors(metric='cosine', n_neighbors=20, radius=1)

Pre-Processing the tweets Using reular Expression¶

In [16]:
import json
In [17]:
with open('D:\BB_internship_Tasks\contractions.json','r') as f:
    contractions_dict = json.load(f)
contractions = contractions_dict
In [18]:
def emoji(tweet):
    # Smile -- :), : ), :-), (:, ( :, (-:, :') , :O
    tweet = re.sub(r'(:\s?\)|:-\)|\(\s?:|\(-:|:\'\)|:O)', ' positiveemoji ', tweet)
    # Laugh -- :D, : D, :-D, xD, x-D, XD, X-D
    tweet = re.sub(r'(:\s?D|:-D|x-?D|X-?D)', ' positiveemoji ', tweet)
    # Love -- <3, :*
    tweet = re.sub(r'(<3|:\*)', ' positiveemoji ', tweet)
    # Wink -- ;-), ;), ;-D, ;D, (;,  (-; , @-)
    tweet = re.sub(r'(;-?\)|;-?D|\(-?;|@-\))', ' positiveemoji ', tweet)
    # Sad -- :-(, : (, :(, ):, )-:, :-/ , :-|
    tweet = re.sub(r'(:\s?\(|:-\(|\)\s?:|\)-:|:-/|:-\|)', ' negetiveemoji ', tweet)
    # Cry -- :,(, :'(, :"(
    tweet = re.sub(r'(:,\(|:\'\(|:"\()', ' negetiveemoji ', tweet)
    return tweet
In [19]:
import re

def process_tweet(tweet):
    tweet = tweet.lower()
    tweet = re.sub('^@[^\s]+', '', tweet)
    tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))',' ', tweet)
    tweet = re.sub(r"\d+", " ", tweet)
    tweet = re.sub('&quot;', " ", str(tweet))
    tweet = emoji(tweet)
    tweet = re.sub(r"\b[a-zA-Z]\b", "", str(tweet))
    for word in tweet.split():
        if word.lower() in contractions:
            tweet = tweet.replace(word, contractions[word.lower()])
    tweet = re.sub(r"[^\w\s]", " ", str(tweet))
    tweet = re.sub(r'(.)\1+', r'\1\1', tweet)
    tweet = re.sub(r"\s+", " ", str(tweet))
    return tweet
In [20]:
from nltk.corpus import stopwords
stop = stopwords.words('english')
In [21]:
def sentiments_2(text):
    try:
        return TextBlob(str(text)).sentiment.polarity
    except:
        return None
df['sentiment_2'] = df['Text'].apply(sentiments_2)
df
Out[21]:
ProductID UserID Rating Text sentiment Updated_score New_score sentiment_2
0 777 0 4 Great taffy at a great price. 0.800000 3.200000 4 0.800000
1 767 3 4 Absolutely wonderful - silky and sexy and comf... 0.633333 2.533333 3 0.633333
2 1080 13 5 Love this dress! it's sooo pretty. 0.437500 2.187500 3 0.437500
3 1077 3 3 I had such high hopes for this dress and reall... 0.120000 0.360000 2 0.120000
4 1049 3 5 I love, love, love this jumpsuit. it's fun, fl... 0.550000 2.750000 3 0.550000
... ... ... ... ... ... ... ... ...
629 823 41 1 I placed order 4+1 soaps.But I have received w... 0.000000 0.000000 1 0.000000
630 823 41 3 The soap is ok for bathing, no scent at all, m... 0.325000 0.975000 2 0.325000
631 847 41 5 For a long time I was searching for Indian soa... -0.025000 -0.125000 1 -0.025000
632 910 7 3 Good but not great 0.150000 0.450000 2 0.150000
633 333 13 5 Quick,easy to make & tasty too. 0.000000 0.000000 1 0.000000

634 rows × 8 columns

In [22]:
df['processed_text'] = df['Text'].apply(process_tweet)
#df['processed_text'] = df['processed_text'].apply
df
Out[22]:
ProductID UserID Rating Text sentiment Updated_score New_score sentiment_2 processed_text
0 777 0 4 Great taffy at a great price. 0.800000 3.200000 4 0.800000 great taffy at great price
1 767 3 4 Absolutely wonderful - silky and sexy and comf... 0.633333 2.533333 3 0.633333 absolutely wonderful silky and sexy and comfor...
2 1080 13 5 Love this dress! it's sooo pretty. 0.437500 2.187500 3 0.437500 love this dress it soo pretty
3 1077 3 3 I had such high hopes for this dress and reall... 0.120000 0.360000 2 0.120000 had such high hopes for this dress and really...
4 1049 3 5 I love, love, love this jumpsuit. it's fun, fl... 0.550000 2.750000 3 0.550000 love love love this jumpsuit it fun flirty an...
... ... ... ... ... ... ... ... ... ...
629 823 41 1 I placed order 4+1 soaps.But I have received w... 0.000000 0.000000 1 0.000000 placed order soaps but have received without ...
630 823 41 3 The soap is ok for bathing, no scent at all, m... 0.325000 0.975000 2 0.325000 the soap is ok for bathing no scent at all mor...
631 847 41 5 For a long time I was searching for Indian soa... -0.025000 -0.125000 1 -0.025000 for long time was searching for indian soap eq...
632 910 7 3 Good but not great 0.150000 0.450000 2 0.150000 good but not great
633 333 13 5 Quick,easy to make & tasty too. 0.000000 0.000000 1 0.000000 quick easy to make tasty too

634 rows × 9 columns

4-7-2022 Here we will find data/nearest data¶

In [23]:
product_ID = int(input('Enter Product Id: '))
data = df_pivot.index.to_list()
data
Enter Product Id: 369
Out[23]:
[89,
 333,
 369,
 444,
 684,
 697,
 767,
 777,
 823,
 847,
 853,
 858,
 862,
 910,
 949,
 1002,
 1003,
 1049,
 1060,
 1065,
 1077,
 1080,
 1095,
 1120,
 6969,
 8001,
 9696]
In [ ]:
 
In [24]:
query_index = data.index(product_ID)
print(query_index)
2
In [25]:
distance, indices = model_knn.kneighbors(df_pivot.iloc[query_index,:].values.reshape(1,-1), n_neighbors = 8)
print(distance)
print(indices)
[[0.         0.47132438 0.51410303 0.58897769 0.58952857 0.61765874
  0.63217631 0.64803948]]
[[ 2  6 22  3  7  1  8  5]]
In [26]:
f=df_pivot.index[indices.flatten()]
f
Out[26]:
Int64Index([369, 767, 1095, 444, 777, 333, 823, 697], dtype='int64', name='ProductID')
In [27]:
#import pandas as pd
#x=pd.DataFrame(df_pivot.index(f))
#x
In [43]:
#Presenting data in tablular format...

p_id = int(input('Enter product id: '))
data = df_pivot.index.to_list()
query_index = data.index(p_id)

distance, indices = model_knn.kneighbors(df_pivot.iloc[query_index,:].values.reshape(1,-1), n_neighbors = 8)
#print(distance)
#print(indices)
x=pd.DataFrame(df_pivot.index[indices.flatten()])
x.drop(0)                         #here the first row should not be displayed so
Enter product id: 369
Out[43]:
ProductID
1 767
2 1095
3 444
4 777
5 333
6 823
7 697
In [ ]:
 
In [ ]:
 
In [29]:
print(type(distance))
<class 'numpy.ndarray'>
In [30]:
distance.tolist(), indices.tolist()
Out[30]:
([[0.0,
   0.4713243830572561,
   0.5141030253286081,
   0.5889776911252591,
   0.5895285707784645,
   0.6176587445860691,
   0.6321763125811057,
   0.6480394832572696]],
 [[2, 6, 22, 3, 7, 1, 8, 5]])
In [31]:
list(zip(distance.tolist()[0], indices.tolist()[0]))
Out[31]:
[(0.0, 2),
 (0.4713243830572561, 6),
 (0.5141030253286081, 22),
 (0.5889776911252591, 3),
 (0.5895285707784645, 7),
 (0.6176587445860691, 1),
 (0.6321763125811057, 8),
 (0.6480394832572696, 5)]
In [32]:
val= list(df['sentiment_2'].values)
netural=0
negative=0
positive=0
for i in val:
    
    if i > 0:
        positive += 1
    elif i < 0:
        negative += 1
    else:
        netural += 1
    
print('Neutrals', netural)
print('Positive', positive)
print('Negative', negative)
Neutrals 85
Positive 451
Negative 98
In [33]:
import matplotlib.pyplot as plt
import numpy as np

p= np.array([netural,positive,negative])
mylabel=['Neutral',' Positive', 'Negative']
shad = [0.3,0.3,0.4]
plt.pie(p, labels=mylabel, explode=shad, shadow= True, autopct ="%2.1f")
plt.show()
In [34]:
#vals = df['Rating'].value_counts().tolist()
#labs = df['Rating'].unique().tolist()
#plt.bar(labs, vals)
#plt.xlabel("Rating")
#plt.ylabel("No. Of Reviews")
#plt.show()
#vals
In [35]:
one=0
two=0
three=0
four=0
five=0
zero=0
vals = list(df['Rating'].values)

for i in vals:
    if i == 0:
        zero += 1
    elif i == 1:
        one += 1
    elif i == 2:
        two += 1
    elif i == 3:
        three += 1
    elif i == 4:
        four += 1
    elif i == 5:
        five += 1

print('0 Rating', zero)
print('1 Rating', one)
print('2 Rating', two)
print('3 Rating', three)
print('4 Rating', four)
print('5 Rating', five)
0 Rating 1
1 Rating 125
2 Rating 34
3 Rating 48
4 Rating 86
5 Rating 340
In [36]:
x_rating = np.array([0,1,2,3,4,5])
y_review = np.array([zero,one,two,three,four,five])
plt.bar(x_rating,y_review, width=0.5, color='yellow')
plt.xlabel("Rating from 0-5")
plt.ylabel("No. of Review")
plt.show()
In [ ]: