Assignment 2: Code

Assignment 2: First Python Program

"""
Created on Sat Dec 10 2016

@author: Susan P

"""

import pandas


data = pandas.read_csv('natality-JAN2003-VA.csv', low_memory=False)


# print (len(data)) #number of observations (rows)

# print (len(data.columns)) # number of variables (columns)

"""


COLUMNS FOR FREQ. DIST'S : APGAR_5MIN, CIGARETTE_USE, ALCOHOL_USE


"""


#upper-case all DataFrame column names

data.columns = map(str.upper, data.columns)

"""

    APGAR_5MIN - FREQ DIST'S
"""

#setting apgar variable to numeric

data['APGAR_5MIN'] = data['APGAR_5MIN'].convert_objects(convert_numeric=True)

print ('Apgar scores by COUNT')

print ('---------------------')
c1 = data['APGAR_5MIN'].value_counts(sort=False)
print (c1)
print (' ') #blank line print

print ('Apgar scores by PERCENTAGE')

print ('--------------------------')
# % frequency distribution using the 'bygroup' function
p1 = data.groupby('APGAR_5MIN').size() * 100 / len(data)
print (p1)
print (' ') #blank line print

"""

    CIGARETTE_USE - FREQ DIST'S
"""

print ('Cigarette use by COUNT (True/False)')

print ('-----------------------------------')
c2 = data['CIGARETTE_USE'].value_counts(sort=False)
print (c2)
print (' ') #blank line print

print ('Cigarette use by PERCENTAGE (True/False)')

print ('----------------------------------------')
p2 = data.groupby('CIGARETTE_USE').size() * 100 / len(data)
print (p2)
print (' ') #blank line print

"""

    ALCOHOL_USE - FREQ DIST'S
"""
print ('Alcohol use by COUNT (True/False)')
print ('---------------------------------')
c3 = data['ALCOHOL_USE'].value_counts(sort=False)
print (c3)
print (' ') #blank line print

print ('Alcohol use by PERCENTAGE (True/False)')

print ('--------------------------------------')
p3 = data.groupby('ALCOHOL_USE').size() * 100 / len(data)
print (p3)
print (' ') #blank line print