Numpy Top Functions used in Data Analysis

Top Functions

# %%
import numpy as np
import pandas as pd


# %%
labels = ['a','b','c']
aList = [1,2,3]
array = np.array([1,2,3])
d = {'a':1,'b':2,'c':3}

# A Series is a numpy array with labels
series = pd.Series(aList)
print(series)


# %%
seriesLabel = pd.Series(aList,index=labels)
print(seriesLabel)


# %%
seriesWithDict = pd.Series(d)
print(seriesWithDict)


# %%
# Let's create a series
series1 = pd.Series([1,2,3],index=['Asia','Europe','NorthAmerica'])
print(series1)


# %%
# Access a series via its label
series1["NorthAmerica"]


# %%
# You can add series together
series2 = pd.Series([1,2,3,4],index=['Asia','Europe','NorthAmerica',"NorthUmbria"])
newSeries3 = series1 + series2
print(newSeries3)
# Expect that it wouldn't be able to add non-existing labels 


# %%
# 2D frame of data
from numpy.random import randn
# All a DataFrame is, is a set of series that share common index abcde; xyz are the series
dataframe = pd.DataFrame(randn(5,3),['a','b','c','d','e'],['x','y','z'])
print(dataframe)


# %%
# Selecting from DataFrames
dataframe["y"] 


# %%
type(dataframe['x']) # Series type


# %%
dataframe[['x','y']] # Get two series


# %%
# Create a new series in a DataFrame. If you create one that is larger than axis=0 or runs along the rows or the labels, it will error out
dataframe['newSeries'] = range(0,5)


# %%
print(dataframe['newSeries'])


# %%
dataframe['newSeries'] 


# %%
# Let's delete a column or a series. You can use the inplace arg to ensure that it modifies it in memory rather than returning a copy
dataframe.drop('newSeries',axis=1)
print(dataframe) # You will see here that the DataFrame is still available


# %%
# Let's drop a labeled row
dataframe.drop('a',axis=0)


# %%
dataframe.shape


# %%
# Rows are also a series as well. This is how to select a row.
dataframe.loc['a']


# %%
# Select a row with iloc
dataframe.iloc[0]


# %%
# You can select by each one
dataframe.loc[['d','a']]


# %%
# You can slice
dataframe.loc['a':'e']


# %%
# You can slice via row and col 
dataframe.loc['a':'b',:'z']


# %%