# Tavsiye Sistemi

Toby Segaran'in Collective Intelligence adli kitabinin 2. bolumunden esinlenerek tavsiye sistemlerine bakalim.

[u.data](data/u.data)
[u.item](data/u.item)

In [12]:
##############################################################################
# A dictionary of movie critics and their ratings of a small
# set of movies
# This dictionary uses a ranking from 1 to 5
critics={
    'Lisa Rose':
        {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
        'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
        'The Night Listener': 3.0},
    'Gene Seymour':
        {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
        'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
         'You, Me and Dupree': 3.5},
    'Michael Phillips':
        {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
        'Superman Returns': 3.5, 'The Night Listener': 4.0},
    'Claudia Puig':
        {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
         'The Night Listener': 4.5, 'Superman Returns': 4.0,
         'You, Me and Dupree': 2.5},
    'Mick LaSalle':
        {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
         'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
         'You, Me and Dupree': 2.0},
    'Jack Matthews':
        {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
         'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
    'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}
}
#print(critics.keys())
print("\nLisa Rose critics: ", critics['Lisa Rose'])
print("Lisa Rose critics for the film Lady in the Water: ", critics['Lisa Rose']['Lady in the Water'])


Lisa Rose critics:  {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0}
Lisa Rose critics for the film Lady in the Water:  2.5


In [14]:
from math import sqrt
def dist(prefs,person1,person2,item):
    return pow(prefs[person1][item] - prefs[person2][item], 2)

print(dist(critics, 'Lisa Rose', 'Gene Seymour', 'Lady in the Water'))

0.25


In [15]:
print(dist(critics, 'Lisa Rose', 'Gene Seymour', 'Snakes on a Plane'))

0.0


In [18]:
# Returns a distance-based similarity score for person1 and person2
def sim_distance(prefs,person1,person2):
    # Get the list of shared_items
    si={}
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item]=1
            break

    # if they have no ratings in common, return 0
    if len(si)==0: return 0

    # Add up the squares of all the differences
    sum_of_squares = sum([dist(prefs,person1,person2,item)
                          for item in prefs[person1] if item in prefs[person2]])
    return 1 / (1 + sum_of_squares)

print("\nSimilarity (using Euclidean distance)between Lisa Rose and Gene Seymour: ", sim_distance(critics,'Lisa Rose','Gene Seymour'))



Similarity (using Euclidean distance)between Lisa Rose and Gene Seymour:  0.14814814814814814


In [19]:
# This function will return a value between â€“1 and 1.
# Returns the Pearson correlation coefficient for p1 and p2
def sim_pearson(prefs,p1,p2):
    # Get the list of mutually rated items
    si={}
    for item in prefs[p1]:
        if item in prefs[p2]:
            si[item]=1
    # Find the number of elements
    n=len(si)
    # if they are no ratings in common, return 0
    if n==0:
        return 0
    # Add up all the preferences
    sum1=sum([prefs[p1][it] for it in si])
    sum2=sum([prefs[p2][it] for it in si])
    # Sum up the squares
    sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
    sum2Sq=sum([pow(prefs[p2][it],2) for it in si])
    # Sum up the products
    pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
    # Calculate Pearson score
    num=pSum-(sum1*sum2/n)
    den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
    if den==0: return 0
    r=num/den
    return r


print("Similarity (using Pearson correlation) between Lisa Rose and Gene Seymour: ",
    sim_pearson(critics,'Lisa Rose','Gene Seymour'))



Similarity (using Pearson correlation) between Lisa Rose and Gene Seymour:  0.39605901719066977


In [24]:
##############################################################################
# Returns the best matches for person from the prefs dictionary.
# Number of results and similarity function are optional params.
def topMatches(prefs,person,n=5,similarity=sim_pearson):
    scores=[(similarity(prefs,person,other),other) for other in prefs if other!=person]
    # Sort the list so the highest scores appear at the top
    scores.sort( )
    scores.reverse( )
    return scores[0:n]

print("\nSimilar (top-5) person like Toby: ", topMatches(critics,'Toby',n=3));




Similar (top-5) person like Toby:  [(0.9912407071619299, 'Lisa Rose'), (0.9244734516419049, 'Mick LaSalle'), (0.8934051474415647, 'Claudia Puig')]


In [25]:
# Gets recommendations for a person by using a weighted average
# of every other user's rankings
def getRecommendations(prefs,person,similarity=sim_pearson):
    totals={}
    simSums={}
    for other in prefs:
        # don't compare me to myself
        if other==person: continue
        sim=similarity(prefs,person,other)
        # ignore scores of zero or lower
        if sim<=0: continue
        for item in prefs[other]:
            # only score movies I haven't seen yet
            if item not in prefs[person] or prefs[person][item]==0:
                # Similarity * Score
                totals.setdefault(item,0)
                totals[item]+=prefs[other][item]*sim
                # Sum of similarities
                simSums.setdefault(item,0)
                simSums[item]+=sim
    # Create the normalized list
    rankings=[(total/simSums[item],item) for item,total in totals.items()]
    # Return the sorted list
    rankings.sort( )
    rankings.reverse( )
    return rankings

print("Gets recommendations for Toby:", getRecommendations(critics,'Toby'))



Gets recommendations for Toby: [(3.3477895267131017, 'The Night Listener'), (2.8325499182641614, 'Lady in the Water'), (2.530980703765565, 'Just My Luck')]


In [29]:
##############################################################################
def transformPrefs(prefs):
    result={}
    for person in prefs:
        for item in prefs[person]:
            result.setdefault(item,{})
            # Flip item and person
            result[item][person]=prefs[person][item]
    return result


movies= transformPrefs(critics)
print("Set of movies most similar to Superman Returns\n")
print("TopMatches:" ,topMatches(movies,'Superman Returns'))



Set of movies most similar to Superman Returns

TopMatches: [(0.6579516949597695, 'You, Me and Dupree'), (0.4879500364742689, 'Lady in the Water'), (0.11180339887498941, 'Snakes on a Plane'), (-0.1798471947990544, 'The Night Listener'), (-0.42289003161103106, 'Just My Luck')]


In [30]:
print("Gets recommendations for whom to invite a premier of a movie",
      " which is similar to Just My Luck", getRecommendations(movies,'Just My Luck'))



Gets recommendations for whom to invite a premier of a movie  which is similar to Just My Luck [(4.0, 'Michael Phillips'), (3.0, 'Jack Matthews')]


In [7]:
# Item-based collaborative filtering
def calculateSimilarItems(prefs,n=10):
    # Create a dictionary of items showing which other items they
    # are most similar to.
    result={}
    # Invert the preference matrix to be item-centric
    itemPrefs = transformPrefs(prefs)
    c=0
    for item in itemPrefs:
        # Status updates for large datasets
        c+=1
        if c%100==0:
            print("%d / %d" % (c,len(itemPrefs)))
        # Find the most similar items to this one
        scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)
        result[item]=scores
    return result

itemsim = calculateSimilarItems(critics) # precomputation will be used
print("\nItem-based collaborative filtering (precomputation):" , itemsim)




Item-based collaborative filtering (precomputation): {'Lady in the Water': [(0.4, 'You, Me and Dupree'), (0.2857142857142857, 'The Night Listener'), (0.2222222222222222, 'Snakes on a Plane'), (0.2222222222222222, 'Just My Luck'), (0.09090909090909091, 'Superman Returns')], 'Snakes on a Plane': [(0.2222222222222222, 'Lady in the Water'), (0.18181818181818182, 'The Night Listener'), (0.16666666666666666, 'Superman Returns'), (0.10526315789473684, 'Just My Luck'), (0.05128205128205128, 'You, Me and Dupree')], 'Just My Luck': [(0.2222222222222222, 'Lady in the Water'), (0.18181818181818182, 'You, Me and Dupree'), (0.15384615384615385, 'The Night Listener'), (0.10526315789473684, 'Snakes on a Plane'), (0.06451612903225806, 'Superman Returns')], 'Superman Returns': [(0.16666666666666666, 'Snakes on a Plane'), (0.10256410256410256, 'The Night Listener'), (0.09090909090909091, 'Lady in the Water'), (0.06451612903225806, 'Just My Luck'), (0.05333333333333334, 'You, Me and Dupree')], 'You, Me a

In [8]:
##############################################################################
# itemMatch is precomputed
def getRecommendedItems(prefs,itemMatch,user):
    userRatings=prefs[user]
    scores={}
    totalSim={}
    # Loop over items rated by this user
    for (item,rating) in userRatings.items( ):
        # Loop over items similar to this one
        for (similarity,item2) in itemMatch[item]:
            # Ignore if this user has already rated this item
            if item2 in userRatings: continue
            # Weighted sum of rating times similarity
            scores.setdefault(item2,0)
            scores[item2]+=similarity*rating
            # Sum of all the similarities
            totalSim.setdefault(item2,0)
            totalSim[item2]+=similarity
    # Divide each total score by total weighting to get an average
    rankings=[(score/totalSim[item],item) for item,score in scores.items( )]
    # Return the rankings from highest to lowest
    rankings.sort( )
    rankings.reverse( )
    return rankings

print("\nRecommendation via (precomputation) for Toby:" ,getRecommendedItems(critics,itemsim,'Toby'))




Recommendation via (precomputation) for Toby: [(3.182634730538922, 'The Night Listener'), (2.5983318700614575, 'Just My Luck'), (2.4730878186968837, 'Lady in the Water')]


In [11]:
##############################################################################
### u.item
# 1|Toy Story (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0
# 2|GoldenEye (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?GoldenEye%20(1995)|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0

# Each line has a user ID, a movie ID, the rating given to the movie by the user, and a timestamp.
### u.data
# 196 242 3 881250949
# 186 302 3 891717742

def loadMovieLens(path='data'):
    # Get movie titles
    movies={}
    for line in open(path+'/u.item', encoding='latin-1'):
        (id,title)=line.split('|')[0:2] # 1|Toy Story
        movies[id]=title
    # Load data
    prefs={}
    for line in open(path+'/u.data', encoding='latin-1'):
        (user,movieid,rating,ts)=line.split('\t')
        prefs.setdefault(user,{})
        prefs[user][movies[movieid]]=float(rating)
    return prefs

prefs = loadMovieLens()
print("\n85th critic from Movielens Dataset", prefs['85'])

# t = {1: 4, 3: 0}
# t.setdefault(5,{}) ## dictionary within a dictionary
# t[5][1] = 1; t[5][2] = 2 ##  {1: 4, 3: 0, 5: {1: 1, 2: 2}}


## itemsim = calculateSimilarItems(prefs,n=50)
## print("\nItem-based recommendations for \n\tMovielens Dataset",getRecommendedItems(prefs,itemsim,'87')[0:30])




85th critic from Movielens Dataset {'To Kill a Mockingbird (1962)': 3.0, 'Streetcar Named Desire, A (1951)': 4.0, 'George of the Jungle (1997)': 2.0, 'Beauty and the Beast (1991)': 3.0, 'Legends of the Fall (1994)': 2.0, 'Koyaanisqatsi (1983)': 3.0, 'Star Trek: The Wrath of Khan (1982)': 3.0, 'Grifters, The (1990)': 4.0, 'Heathers (1989)': 3.0, 'Birdcage, The (1996)': 2.0, 'Time to Kill, A (1996)': 3.0, 'Godfather: Part II, The (1974)': 5.0, 'Mighty Aphrodite (1995)': 3.0, 'It Happened One Night (1934)': 4.0, 'Much Ado About Nothing (1993)': 4.0, 'Face/Off (1997)': 4.0, 'Dumbo (1941)': 3.0, 'Restoration (1995)': 2.0, 'Ran (1985)': 4.0, 'Good Will Hunting (1997)': 4.0, 'Brazil (1985)': 4.0, 'Gone with the Wind (1939)': 4.0, 'Silence of the Lambs, The (1991)': 4.0, 'Victor/Victoria (1982)': 3.0, 'Quiz Show (1994)': 4.0, 'Manhattan (1979)': 5.0, 'Fried Green Tomatoes (1991)': 4.0, 'Full Monty, The (1997)': 3.0, 'Natural Born Killers (1994)': 3.0, 'Dances with Wolves (1990)': 2.0, 'Remain

In [10]:

########################################################################
DB = {
    "uzay" : {"Star Wars": 4.5, "Superman": 3.5,"Batman": 4,},
    "selin" : {"Venedik": 4.5, "Paris": 3.5,"Batman": 4,},
    "fatih" : {"Superman": 4,"Batman": 4},
}
print("\nUzaycritics: ", DB['uzay'])
print("Uzay critics for the film Star Wars: ", DB['uzay']['Star Wars'])
print("\nSimilarity (using Euclidean distance)between uzay and fatih: ", sim_distance(DB,'uzay','fatih'))
print("\nSimilarity (using Euclidean distance)between selin and fatih: ", sim_distance(DB,'selin','fatih'))


print("\nSimilar (top-5) person like fatih: ", topMatches(DB,'fatih',similarity=sim_distance));
print("Gets recommendations for fatih:", getRecommendations(DB,'fatih',similarity=sim_distance))





Uzaycritics:  {'Star Wars': 4.5, 'Superman': 3.5, 'Batman': 4}
Uzay critics for the film Star Wars:  4.5

Similarity (using Euclidean distance)between uzay and fatih:  0.8

Similarity (using Euclidean distance)between selin and fatih:  1.0

Similar (top-5) person like fatih:  [(1.0, 'selin'), (0.8, 'uzay')]
Gets recommendations for fatih: [(4.5, 'Venedik'), (4.5, 'Star Wars'), (3.5, 'Paris')]
