The purpose of the code is to get details of all the Categories on Netflix and then to gather information about Sub-Categories and movies under each Sub-Category.
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
def make_soup(url):
return BeautifulSoup(requests.get(url).text, 'html.parser')
def browseCategory(category, data):
category_url = data[category-1][2]
category = data[category-1][1]
subCategory_details = []
count = 1
subCategories = []
soup = make_soup(category_url)
cards_list = soup.find_all('section',{'class':'nm-collections-row'})
for card in cards_list:
try:
subCategory = card.find('h1').text
movie_list = []
movies = card.find_all('li')
movie_count = 1
for movie in movies:
try:
movie_title = movie.find('span',{'class':'nm-collections-title-name'}).text
movie_link = movie.find('a').get('href')
movie_list.append([movie_count, movie_title , movie_link])
movie_count += 1
except AttributeError:
pass
subCategories.append(subCategory)
subCategory_details.append(movie_list)
count += 1
except AttributeError:
pass
return subCategories, subCategory_details, count-1
def getCategories(base_url):
category_soup = make_soup(base_url)
categories = category_soup.find_all('section',{'class':'nm-collections-row'})
result=[]
count = 1
for category in categories:
try:
Title = category.find('span', {'class':'nm-collections-row-name'}).text
url = category.find('a').get('href')
result.append([count, Title, url])
count += 1
except AttributeError:
pass
#print(result)
return result
def main():
netflix_url = "https://www.netflix.com/in/browse/genre/839338"
categories = getCategories(netflix_url)
print("Please select one of the category")
df = pd.DataFrame(np.array(categories), columns=['Sr.No', 'Title', 'link'])
print(df.to_string(index=False))
choice = int(input('\n\n Please Enter your Choice: \n'))
subCategories, movieList, count = browseCategory(choice, categories)
for i in range(0, count):
print(subCategories[i],'\n\n')
subCategory_df = pd.DataFrame(np.array(movieList[i]), columns=['Sr.No', 'Title', 'link'])
print(subCategory_df.to_string(index=False))
print("\n\n\n")
if __name__ == '__main__':
main()