Aprendizaje Supervisado

Machine Learning

import matplotlib.pylab as plt

Anatomía de las gráficas en matplotlib tomado de: https://pbpython.com/effective-matplotlib.html

Figura \ Marcadores \ Colores \ Marcadores

lista = ["daniela", "alex", "juan", "rocio"]

for dato in enumerate(lista):
  print(dato)

for i, nombre in enumerate(lista):
  print(i, " ", nombre)

Anotaciones sobre puntos en un mapa de dispersión (scatterplot)

y = [2.56422, 3.77284, 3.52623, 3.51468, 3.02199]
x = [0.15, 0.3, 0.45, 0.6, 0.75]
n = [58, 651, 393, 203, 123]

fig, ax = plt.subplots()
ax.scatter(x,y)

fig, ax = plt.subplots()

ax.scatter(x,y)

for i, txt in enumerate(n):
  ax.annotate(txt, (x[i], y[i]))

ax.annotate("diplomado", (0.5, 3), c="r", size=18)

y = [2.56422, 3.77284, 3.52623, 3.51468, 3.02199]
x = [1.6676, 6.6234, 1.523434, 6.234234 ,5.6345]
z = [0.15, 0.3, 0.45, 0.6, 0.75]
n = [58, 651, 393, 203, 123]

fig, ax = plt.subplots()


ax.scatter(z, y, marker="v", c="b")
ax.scatter(z, x, marker=".", c="r")

y = [2.56422, 3.77284, 3.52623, 3.51468, 3.02199]
x = [1.6676, 6.6234, 1.523434, 6.234234 ,5.6345]
z = [0.15, 0.3, 0.45, 0.6, 0.75]
n = [58, 651, 393, 203, 123]

fig, ax = plt.subplots()


ax.scatter(z, y, marker="v", c="b")
ax.scatter(z, x, marker=".", c="#FF00FF")
ax.scatter(z, x, marker=".", c=(0.2, 0.1, 0.3))

import matplotlib.pyplot as plt
plt.figure()

#plt.plot([0, 1], [0, 1], 'r-')
plt.plot([0, 1], [0, 1], c = 'r', ls='-')
plt.xlim([-2.0, 2.0])
plt.ylim([-2.0, 2.0])
plt.title('Test figure')

#plt.show()

import matplotlib.pyplot as plt
plt.figure()
plt.plot?
#plt.plot([0, 1], [0, 1], 'r-.')
plt.plot([0, 1], [0, 1], c="r", linestyle='-.')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.title('Test figure')

#plt.show()

import matplotlib.pyplot as plt
plt.figure()

plt.plot([0, 1], [0, 1],'r-.')
plt.plot([0, 1], [0.5, 0.5],'b--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.title('Test figure')

#plt.show()

import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(4,4))
ax.bar(x=['A','B','C'], height=[3.1,7,4.2], color='r')
ax.set_xlabel(xlabel='X title', size=70)
ax.set_ylabel(ylabel='Y title' , color='b', size=20)
ax.tick_params(which="major", color="g")
for tick in ax.xaxis.get_major_ticks():
      tick.label.set_fontsize(24)
      tick.label.set_rotation(45)
plt.show()
plt.tight_layout()

fig, ax = plt.subplots(figsize=(3,3))
ax.plot(['Daniela','Pedro','Alex'], [4,6,3], color='r')
ax.set_xlabel('TITLE 1')
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
plt.show()

import numpy as np
x = np.array(range(10))
plt.plot(x, x**2, x**3)

import numpy as np
x = np.array(range(10))
plt.plot(x, x**2)
plt.plot(x, x**3)
plt.legend(["$x^1 \mathbb{R} \gamma$", "$x^3$"])

import numpy as np
fig, ax = plt.subplots()
x = np.array(range(10))
ax.plot(x, x**2)
ax.plot(x, x**3)
ax.legend(["$x^1 \mathbb{R} \gamma$", "$x^3$"])
ax.annotate("colombia", (6.5, 700), c="r", size=18)
ax.annotate("peru", (8.5, 80), c="r", size=18)

import numpy as np
x = np.array(range(10))
plt.plot(x, x**2)
plt.plot(x, x**3)
plt.legend(["$x^1 \mathbb{R} \gamma$", "$x^3$"],loc="lower left")

import numpy as np
x = np.array(range(10))
plt.plot(x, x**2)
plt.plot(x, x**3)
plt.legend(["$x^1 \mathbb{R} \gamma$", "$x^3$"],loc=(0.2, 0.4))

import numpy as np
fig, ax = plt.subplots(1,1, figsize=[3,5], dpi=100)
x = np.array(range(10))

plt.plot(x, x**2)
plt.plot(x, x**3)
plt.legend(["$x^1 \mathbb{R} \gamma$", "$x^3$"])

axes = fig.add_axes([1, 0, 1.3, 1])

axes.plot(x, x**2)
axes.plot(x, x**3)
axes.legend(["$x^1 \mathbb{R} \gamma$", "$x^3$"])
plt.show()

import numpy as np
x = np.array(range(10))
plt.plot(x, x**2, label = "$x^2 \mathbb{R} \gamma$")
plt.plot(x, x**3, label = "$x^3$")
plt.legend()

plt.bar(x, x**2)

mu, sigma = 0, 0.1 # mean and standard deviation
s = np.random.normal(mu, sigma, 100000)

plt.hist(s)
#plt.hist(s, density=True)

mu, sigma = 0, 0.1 # mean and standard deviation
s = np.random.normal(mu, sigma, 1000)

outliers = np.array([0.90, 0.92,0.91,0.904])

s = np.concatenate((s, outliers))
plt.hist(s)

mu, sigma = 0, 0.1 # mean and standard deviation
s = np.random.normal(mu, sigma, 1000)

outliers = np.array([0.90, 0.92,0.91,0.904])

s = np.concatenate((s, outliers))

fig, ax = plt.subplots(1,2, figsize=[10,5])

ax[0].boxplot(s)
ax[1].hist(s)


fig, ax = plt.subplots(1,2, figsize=[10,5])

ax[0].boxplot(s)
ax[1].hist(s)

from scipy import stats
mu, sigma = 0, 0.1 # mean and standard deviation
s = np.random.normal(mu, sigma, 10000)
plt.hist(s, density=True, bins = 50)

xx = np.linspace(-1, 1, 10000)
kde = stats.gaussian_kde(s)
plt.plot(xx, kde(xx))

from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(41)

N = 100
x = np.random.randint(0, 9, N)
bins = np.arange(10)

kde = stats.gaussian_kde(x)
xx = np.linspace(0, 9, 1000)
fig, ax = plt.subplots(figsize=(8,6))
ax.hist(x, density=True, bins=bins, alpha=0.3)
ax.plot(xx, kde(xx))
# Kernel density estimation

# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')

# set seaborn style
sns.set_style("white")

# Basic 2D density plot
sns.kdeplot(x=df.sepal_width, y=df.sepal_length)
plt.show()

# Custom the color, add shade and bandwidth
figura = sns.kdeplot(x=df.sepal_width, y=df.sepal_length, cmap="Reds", shade=True, bw_adjust=.5)
print(type(figura))
print(dir(figura))
figura.set_title("titulo")
#plt.title("titulo")
plt.show()

# Add thresh parameter
sns.kdeplot(x=df.sepal_width, y=df.sepal_length, cmap="Blues", shade=True, thresh=0)
plt.show()

# library & dataset
import seaborn as sns
import matplotlib.pyplot as plt
df = sns.load_dataset('iris')

# Custom the inside plot: options are: “scatter” | “reg” | “resid” | “kde” | “hex”
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='scatter')
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='scatter', marker="+", s=100, marginal_kws=dict(bins=25, fill=False),)
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='scatter', height=5, ratio=2, marginal_ticks=True)
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='hex')
g = sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='kde')

g.plot_joint(sns.kdeplot, color="r", zorder=0, levels=6)
g.plot_marginals(sns.rugplot, color="r", height=-.15, clip_on=False)

plt.show()

5.1. Método `heatmap`

Para que un mapa de calor funcione correctamente, sus datos ya deberían estar en forma de matriz. La función de heatmap colorea los mapas de calor por usted. Por ejemplo:

import seaborn as sns
tips = sns.load_dataset('tips') # Retorna un DataFrame de pandas.

tips.head()

tabla = pd.crosstab(index = tips["time"], columns = tips["sex"])
tabla

sns.heatmap(tabla)

Recordemos que la función corr de pandas retorna una matriz con los valores de correlación entre pares de columnas numéricas. De esta manera, los indices horizontales y verticales del DataFrame retornado son los nombres de las columnas con valores numéricos de la matriz original, los valores de la matriz retornada son los coeficientes de correlación entre los correspondientes pares de índices que se interceptan:

tips.corr()  #Retorna la matriz de correlaciones para pares de columnas con valor numérico.

from scipy.stats import kendalltau, pearsonr, spearmanr
pearsonr?
#get the p values
pval = tips.corr(method=lambda x, y: pearsonr(x, y)[1])
pval = pval - np.eye(pval.shape[0])

Ahora, usamos la función heatmap para generar el mapa de calor de la matriz de correlación retornada por corr.

pval.iloc[0,0] = "Nohaycorrelación"
pval

sns.heatmap(tips.corr(), annot = pval, fmt="");

Para este caso, el diagrama permite identificar mediante tonos de colores los coeficientes de correlación. Como es de suponer, la diagonal representa la intersección de una columna consigo misma por lo que su valor de correlación es igual a $1.0$.

Es posible incluir en la matriz el valor correspondiente a los coeficientes de correlación ajustando el parámetro annot en True:

sns.heatmap(tips.corr(),
            annot=True,       # Se muestran los textos correspondientes.
            fmt=r'.2f',       # Formato de la cadena de texto asociada. (2 dígitos decimales)
            cmap='coolwarm'); # Colormap usado en la visualización.

import pandas as pd
import matplotlib.pyplot as plt

datos = pd.read_csv("casasboston.csv")
#datos = datos[["RM","CRIM", "MEDV", "TOWN", "CHAS", "INDUS", "LSTAT"]]
df = datos[["RM","CRIM", "MEDV", "TOWN", "CHAS"]]

df = datos.rename(columns={
	"TOWN":"CIUDAD",
	"CRIM":"INDICE_CRIMEN",
	"INDUS":"PCT_ZONA_INDUSTRIAL",
	"CHAS":"RIO_CHARLES",
	"RM":"N_HABITACIONES_MEDIO",
	"MEDV":"VALOR_MEDIANO",
	"LSTAT":"PCT_CLASE_BAJA"
})

print (df.sample(5))

valor_por_ciudad = df.groupby("CIUDAD")["VALOR_MEDIANO"].mean()
valor_por_ciudad.head(10).plot.barh()
plt.show()

pd.qcut(df.VALOR_MEDIANO, 5)

df["VALOR_CUANTILES"] = pd.qcut(df.VALOR_MEDIANO, 5)
df.boxplot(column="INDICE_CRIMEN", by="VALOR_CUANTILES",
	figsize=(8,6))
plt.show()

df["VALOR_CUANTILES"]

df.RIO_CHARLES.value_counts().plot.pie()
plt.show()

df.plot.scatter(x="INDICE_CRIMEN", y="VALOR_MEDIANO", alpha=0.2)
plt.show()

df.INDICE_CRIMEN.plot.hist(bins=100, xlim=(0,20))
plt.show()

df.N_HABITACIONES_MEDIO.plot.hist()
plt.show()

# library & dataset
import seaborn as sns
df = sns.load_dataset('iris')
import matplotlib.pyplot as plt

# Basic correlogram
sns.pairplot(df)

df = pd.read_csv('Pokemon.csv', index_col = 0)

df.head()

sns.lmplot(x="Attack", y="Defense", data=df)

sns.lmplot(x="Attack", y="Defense", data=df, fit_reg = False, hue="Stage")

sns.lmplot(x="Attack", y="Defense", data=df, fit_reg = False, hue="Stage")
plt.ylim(0, None)
plt.xlim(0, None)

sns.boxplot(data=df)

stats_df = df.drop(["Total", ])

df.head()

media_por_tipo_1_total = df.groupby("Type 1").mean()['Total']
media_por_tipo_1_Attack = df.groupby("Type 1").mean()['Attack']
media_por_tipo_1_total - media_por_tipo_1_Attack

df.groupby("Type 1").mean()['Total'] - df.groupby("Type 1").mean()['Attack']

for

Pepe

variable = "edad"
variable_dependiente = "tension"

formula = f"{variable} ~ {variable_dependiente}"
formula

variable = "edad"
variable_dependiente = "tension"

formula = variable + " ~ " + variable_dependiente
formula

datos.info()

datos["LON"] = datos["LON"].astype(str)

datos.info()

datos["LON"] = datos["LON"].astype("category")

type(datos["LON"][0])

fig1  = plt.figure()

axmajor = fig1.add_axes([0, 0, 2, 2])
fig1.add_axes([1, 1, 0.5, 0.5])

set_xlim(0,20)
set_ylim(0,400)

Titulos

Subtitulos

subsubtitulos

listas
listas

código

link text

List item
List item

https://www.markdownguide.org/basic-syntax/

fig, ax = plt.subplots(2,2)

ax.shape

fig, ax = plt.subplots(2,2)
ax[0][0].grid(True)

Escala logarítmica

import numpy as np
# Datos usados en este material.

x = np.linspace(0, 5, 10)
y = x**2

import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 3, figsize=(10,4))

#Escala lineal
axes[0].plot(x, x**2,  label="x**2")
axes[1].plot(x, np.exp(x),  label="e**x")
axes[2].plot(x, x**5, label="x**5")

import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(10,4))

#Escala lineal
axes[0].plot(x, x**2,  label="x**2")
axes[0].plot(x, np.exp(x),  label="e**x")
axes[0].plot(x, x**5, label="x**5")
axes[0].set_title("Escala lineal")


#Escala logarítmica
axes[1].plot(x, x**2,  label="x**2")
axes[1].plot(x, np.exp(x),  label="e**x")
axes[1].plot(x, x**5, label="x**5")
axes[1].legend()

#########################
axes[1].set_yscale("log")
##########################

axes[1].set_title("Escala logarítmica (en $y$)");

Raw Strings (cadena de caracteres cruda)

"\n Esto es una cadena de carácteres cruda \t. Esta no compila los salto de liena ni los tabuladores."

print("\n Esto es una cadena de carácteres cruda \t. Esta no compila los salto de liena ni los tabuladores.")

r"\n Esto es una cadena de carácteres cruda \t. Esta no compila los salto de linea ni los tabuladores."

print(r"\n Esto es una cadena de carácteres cruda \t. Esta no compila los salto de linea ni los tabuladores.")