1+ # importing the libraries
2+ import numpy as np
3+ import pandas as pd
4+ import matplotlib .pyplot as plt
5+ import plotly .express as px
6+ import streamlit as st
7+ 8+ # Title and Markdown
9+ st .title ("AN EXAMPLE EDA APP" )
10+ st .markdown (''' <h3>This is an example of how to do EDA in streamlit app</h3>''' ,unsafe_allow_html = True )
11+ 12+ # File upload
13+ file_up = st .file_uploader ("Upload a file" , type = 'csv' )
14+ 15+ # Check if the file uploaded is successfull or not, if successfull then read the file
16+ if file_up is not None :
17+ st .success ("File uploaded successfully" )
18+ df = pd .read_csv (file_up )
19+ obj = []
20+ int_float = []
21+ for i in df .columns :
22+ clas = df [i ].dtypes
23+ if clas == 'object' :
24+ obj .append (i )
25+ else :
26+ int_float .append (i )
27+ 28+ # Remove null values and replace them with mean and median value
29+ with st .form (key = 'my_form' ):
30+ with st .sidebar :
31+ st .sidebar .header ("To remove NULL values press below button" )
32+ submit_button = st .form_submit_button (label = "Remove NULL" )
33+ 34+ if submit_button :
35+ for i in df .columns :
36+ clas = df [i ].dtypes
37+ if clas == 'object' :
38+ df [i ].fillna (df [i ].mode ()[0 ], inplace = True )
39+ else :
40+ df [i ].fillna (df [i ].mean (), inplace = True )
41+ 42+ # finding the number of null values in each column
43+ ls = []
44+ for i in df .columns :
45+ dd = sum (pd .isnull (df [i ]))
46+ ls .append (dd )
47+ 48+ # if number of null values are zero it will display some text else it will plot bar plot by each column
49+ if max (ls ) == 0 :
50+ st .write ("Total no. of NULL values: " , str (max (ls )))
51+ else :
52+ st .write ("Bar plot to know the number of NULL values in each column" )
53+ st .write ("Total number of null values: " , str (max (ls )))
54+ fig = px .bar (x = df .columns , y = ls ,labels = {'x' :"Column Names" ,'y' :"No. of Null values" })
55+ st .plotly_chart (fig )
56+ 57+ # Frequency Plot
58+ st .sidebar .header ("Select variable" )
59+ selected = st .sidebar .selectbox ('Object variables' ,obj )
60+ st .write ("Bar Plot to know the frequency of each category" )
61+ frequency = df [selected ].value_counts ()
62+ 63+ fig2 = px .bar (frequency , x = frequency .index ,y = selected ,labels = {'x' :selected , 'y' :'count' })
64+ st .plotly_chart (fig2 )
65+ 66+ # Correlation chart
67+ st .sidebar .header ("Select variable" )
68+ selected2 = st .sidebar .multiselect ("Variables" ,int_float )
69+ st .write ("Scatter plot for correlation" )
70+ if len (selected2 ) == 2 :
71+ fig3 = px .scatter (df ,x = selected2 [0 ], y = selected2 [1 ])
72+ st .plotly_chart (fig3 )
73+ else :
74+ st .write ("Select any 2 variables only" )
0 commit comments