"""
Created on Sat Dec 10 2016
@author: Susan P
"""
import pandas
data = pandas.read_csv('natality-JAN2003-VA.csv', low_memory=False)
# print (len(data)) #number of observations (rows)
# print (len(data.columns)) # number of variables (columns)
"""
COLUMNS FOR FREQ. DIST'S : APGAR_5MIN, CIGARETTE_USE, ALCOHOL_USE
"""
#upper-case all DataFrame column names
data.columns = map(str.upper, data.columns)
"""
APGAR_5MIN - FREQ DIST'S
"""
#setting apgar variable to numeric
data['APGAR_5MIN'] = data['APGAR_5MIN'].convert_objects(convert_numeric=True)
print ('Apgar scores by COUNT')
print ('---------------------')
c1 = data['APGAR_5MIN'].value_counts(sort=False)
print (c1)
print (' ') #blank line print
print ('Apgar scores by PERCENTAGE')
print ('--------------------------')
# % frequency distribution using the 'bygroup' function
p1 = data.groupby('APGAR_5MIN').size() * 100 / len(data)
print (p1)
print (' ') #blank line print
"""
CIGARETTE_USE - FREQ DIST'S
"""
print ('Cigarette use by COUNT (True/False)')
print ('-----------------------------------')
c2 = data['CIGARETTE_USE'].value_counts(sort=False)
print (c2)
print (' ') #blank line print
print ('Cigarette use by PERCENTAGE (True/False)')
print ('----------------------------------------')
p2 = data.groupby('CIGARETTE_USE').size() * 100 / len(data)
print (p2)
print (' ') #blank line print
"""
ALCOHOL_USE - FREQ DIST'S
"""
print ('Alcohol use by COUNT (True/False)')
print ('---------------------------------')
c3 = data['ALCOHOL_USE'].value_counts(sort=False)
print (c3)
print (' ') #blank line print
print ('Alcohol use by PERCENTAGE (True/False)')
print ('--------------------------------------')
p3 = data.groupby('ALCOHOL_USE').size() * 100 / len(data)
print (p3)
print (' ') #blank line print