cossim2.py

Created by ews31415

Created on August 06, 2023

902 Bytes

Calculates the cosine similarity between two phrases. When you enter the phrases, do not use punctuation. Exact syntax is matched.

Video from StatQuest (January 29, 2023): https://www.youtube.com/watch?v=e9U0QAFbfLI


# phrases prograrm
# 2023-08-06 ews

from math import *

# subroutines
def unique(l):
  u=[]
  for i in l:
    if i not in u:
      u.append(i)
  return u

def counta(lmain,lsrc):
  c=[lsrc.count(i) for i in lmain]
  return c

def norm(v):
  # list have integers
  s=[i**2 for i in v]
  s=sqrt(sum(s))
  return s

# main program
print("\nDo not use punctuation")
str1=input("phrase 1? ")
str2=input("phrase 2? ")

# split into 2 lists
list1=str1.split()
list2=str2.split()

# find the unique list
list3=list1+list2
list3=unique(list3)

# word count
listc1=counta(list3,list1)
listc2=counta(list3,list2)

# vector operations
# norm
n1=norm(listc1)
n2=norm(listc2)
# dot
d=sum([listc1[i]*listc2[i] for i in range(len(listc1))])

# cosine similarity
c=d/(n1*n2)
# no need to take the arccosine
print("cosine similarity: ")
print(c)
print("\n0: no words in common \n1: all words in common")