Pandas DataFrames
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
"capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
"area": [8.516, 17.10, 3.286, 9.597, 1.221],
"population": [200.4, 143.5, 1252, 1357, 52.98] }
import pandas as pd
brics = pd.DataFrame(dict)
print(brics)
Adding index to DataFrame
# Set the index for brics brics.index = ["BR", "RU", "IN", "CH", "SA"] # Print out brics with new index values print(brics)
Reading CSV by Pandas DataFrame
# Import pandas as pd
import pandas as pd
# Import the cars.csv data: cars
cars = pd.read_csv('cars.csv')
# Print out cars
print(cars)
Reading CSV file by Pandas DataFrame with 1st column as index
# Import pandas and cars.csv
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)
# Print out country column as Pandas Series
print(cars['cars_per_cap'])
# Print out country column as Pandas DataFrame
print(cars[['cars_per_cap']])
# Print out DataFrame with country and drives_right columns
print(cars[['cars_per_cap', 'country']])
Print partial rows (observations) from a DataFrame
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)
# Print out first 4 observations
print(cars[0:4])
# Print out fifth, sixth, and seventh observation
print(cars[4:6])
Data access by loc and iloc in Pandas DaraFrame
loc is label-based, and iloc is integer index based
# Import cars data
import pandas as pd
cars = pd.read_csv('cars.csv', index_col = 0)
# Print out observation for Japan
print(cars.iloc[2])
# Print out observations for Australia and Egypt
print(cars.loc[['AUS', 'EG']])
Random number generation
import random
def lottery():
# returns 6 numbers between 1 and 40
for i in range(6):
yield random.randint(1, 40)
# returns a 7th number between 1 and 15
yield random.randint(1,15)
for random_number in lottery():
print("And the next number is... %d!" %(random_number))
Swap variables' value
a = 1 b = 2 a, b = b, a print(a,b)
Fibonacci series generator
The first two numbers of the series is always equal to 1, and each consecutive number returned is the sum of the last two numbers - the below code uses only two variables to get the result.
def fib():
a, b = 1, 1
while 1:
yield a
a, b = b, a + b
# testing code
import types
if type(fib()) == types.GeneratorType:
print("Good, The fib function is a generator.")
counter = 0
for n in fib():
print(n)
counter += 1
if counter == 10:
break
Split string as list
sentence = "the quick brown fox jumps over the lazy dog" words = sentence.split() print(words)
Filter positive numbers only - 1
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7]
newlist = []
for number in numbers:
if number>0:
newlist.append(number)
print(newlist)
Filter positive numbers only - 2
numbers = [34.6, -203.4, 44.9, 68.3, -12.2, 44.6, 12.7] newlist = [int(x) for x in numbers if x > 0] print(newlist)
Multiple Function Argument recognition - the list of "therest" parameters
def foo(first, second, third, *therest):
print("First: %s" %(first))
print("Second: %s" %(second))
print("Third: %s" %(third))
print("And all the rest... %s" %(list(therest)))
foo(1,2,3,4,5)
Multiple Function Argument by keyword
def bar(first, second, third, **options):
if options.get("action") == "sum":
print("The sum is: %d" %(first + second + third))
if options.get("number") == "first":
return first
result = bar(1, 2, 3, action = "sum", number = "first")
print("Result: %d" %(result))