I am using Kaggle dataset "covid19-corona-virus-india-dataset/complete.csv"
for my analysis.
We will first try to find out top five states with most number of cases,
and then will try to plot the data on day on day basis.
.
Lets first import relevant module
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)
import folium
import seaborn as sns
import os
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)
import folium
import seaborn as sns
import os
Now in second part I have created a dataframe and call the def
operationDfs by passing the newly created dataframe, here if you see the we
have used time package to record the total execution time to run the entire
program.
df_complete =
pd.read_csv('../input/covid19-corona-virus-india-dataset/complete.csv')
df_patient_wise = pd.read_csv('../input/covid19-corona-virus-india-dataset/patients_data.csv')
#date and state wise total
df = pd.DataFrame(df_complete.groupby(['Date','Name of State / UT'])['Total Confirmed cases (Indian National)'].sum()).reset_index()
df[df['Name of State / UT']=='Maharashtra']
#State wise Total till 29th March
df_stateWiseTot = pd.DataFrame(df.groupby(['Name of State / UT'])['Total Confirmed cases (Indian National)'].sum()).reset_index()
df_stateWiseTot.sort_values('Total Confirmed cases (Indian National)', axis = 0, ascending = False, inplace = True, na_position ='last')
df_stateWiseTot.nlargest(5,'Total Confirmed cases (Indian National)')
#OUTPUT
Name of State / UT Total Confirmed cases (Indian National)
Maharashtra 1294
Kerala 1264
Uttar Pradesh 512
Karnataka 480
Delhi 390
df_patient_wise = pd.read_csv('../input/covid19-corona-virus-india-dataset/patients_data.csv')
#date and state wise total
df = pd.DataFrame(df_complete.groupby(['Date','Name of State / UT'])['Total Confirmed cases (Indian National)'].sum()).reset_index()
df[df['Name of State / UT']=='Maharashtra']
#State wise Total till 29th March
df_stateWiseTot = pd.DataFrame(df.groupby(['Name of State / UT'])['Total Confirmed cases (Indian National)'].sum()).reset_index()
df_stateWiseTot.sort_values('Total Confirmed cases (Indian National)', axis = 0, ascending = False, inplace = True, na_position ='last')
df_stateWiseTot.nlargest(5,'Total Confirmed cases (Indian National)')
#OUTPUT
Name of State / UT Total Confirmed cases (Indian National)
Maharashtra 1294
Kerala 1264
Uttar Pradesh 512
Karnataka 480
Delhi 390
Lets plot the data state wise with confirmed case on day on day basis
fig1=go.Figure()
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Maharashtra') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Maharashtra']['Total Confirmed cases (Indian National)'],
name='Maharashtra'
))
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Kerala') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Kerala']['Total Confirmed cases (Indian National)'],
name='Kerala'
))
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Uttar Pradesh') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Uttar Pradesh']['Total Confirmed cases (Indian National)'],
name='Uttar Pradesh'
))
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Karnataka') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Karnataka']['Total Confirmed cases (Indian National)'],
name='Karnataka'
))
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Delhi') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Delhi']['Total Confirmed cases (Indian National)'],
name='Delhi'
))
fig1.layout.update(title_text='COVID-19 Top 4 State Wise Data in India',xaxis_showgrid=False, yaxis_showgrid=False, width=1100,
height=500,font=dict(
# family="Courier New, monospace",
size=12,
color="white"
))
fig1.layout.plot_bgcolor = 'Black'
fig1.layout.paper_bgcolor = 'Black'
fig1.show()
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Maharashtra') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Maharashtra']['Total Confirmed cases (Indian National)'],
name='Maharashtra'
))
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Kerala') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Kerala']['Total Confirmed cases (Indian National)'],
name='Kerala'
))
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Uttar Pradesh') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Uttar Pradesh']['Total Confirmed cases (Indian National)'],
name='Uttar Pradesh'
))
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Karnataka') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Karnataka']['Total Confirmed cases (Indian National)'],
name='Karnataka'
))
fig1.add_trace(go.Scatter(x=df[(df['Name of State / UT']=='Delhi') & (df['Date'] < '2020-03-29') ]['Date'],
y=df[df['Name of State / UT']=='Delhi']['Total Confirmed cases (Indian National)'],
name='Delhi'
))
fig1.layout.update(title_text='COVID-19 Top 4 State Wise Data in India',xaxis_showgrid=False, yaxis_showgrid=False, width=1100,
height=500,font=dict(
# family="Courier New, monospace",
size=12,
color="white"
))
fig1.layout.plot_bgcolor = 'Black'
fig1.layout.paper_bgcolor = 'Black'
fig1.show()
Data Science with…Python J
Post Reference: Vikram Aristocratic Elfin Share