Friday, April 20, 2018

Text Processing in Pandas


impport pandas as pd
import pandas as pd
chicago = pd.read_csv('chicago.csv').dropna(how='all')

mask1 = chicago['Position Title'].str.lower().str.contains('water chemist ii')
chicago[mask1].count()
chicago['Name'] = chicago['Name'].str.strip()
chicago['Position Title'] = chicago['Position Title'].str.lstrip().str.rstrip()
chicago.set_index('Name',inplace=True)
chicago.head()
chicago.index = chicago.index.str.strip().str.title()
chicago.head()
chicago.columns
chicago.column = chicago.columns.str.upper()
chicago.column
chicago.head()
"Hello today is friday".split()
chicago['Name'].str.split(',').str.get(0).str.title().value_counts()
chicago['Position Title'].str.split().str.get(0).value_counts()
chicago['Name'].str.split(',').str.get(1).str.strip().str.title().str.split().str.get(0).value_counts()
chicago[['First Name','Last Name']] = chicago['Name'].str.split(',',expand=True)
chicago.head(3)
chicago['Position Title'].str.split(' ',expand=True,n=1)
In [ ]: