# Pakete mit Kurzbeschreibung

import numpy as np         # Numerik: Arrays, Zufall, Statistik
import pandas as pd        # Tabellen: Einlesen, Filtern, Auswerten
import matplotlib.pyplot as plt  # Plots: Histogramme, Linien, Balken
import seaborn as sns      # Statistische Visualisierung
import scipy.stats as stats  # Statistische Tests & Maße
import math                # Basis-Mathematik: sqrt, log, etc.
from pydataset import data # Zugriff auf R-Beispieldatensätze (z. B. Titanic)

print("Hello world")

a = 1
b = 5

Hello world

a = 4
b = 5
a + b  # ergibt 9

9

zahl = math.sqrt(9)
print(f"Wurzel aus 9 ist {int(zahl)}")

Wurzel aus 9 ist 3

#pip install pandas
import pandas as pd
import scipy.stats as stats

# Excel-Datei einlesen mit pandas
#pearson = pd.read_excel("D:/tests/pearson.xlsx", sheet_name="Tabelle1")

#import pandas as pd
#import numpy as np
#from scipy.stats import describe

np.random.seed(0)
chi = pd.DataFrame({
    "vorher": np.random.choice(["ja", "nein"], size=20),
    "nachher": np.random.choice(["ja", "nein"], size=20)
})

print(chi.head(5))

  vorher nachher
0     ja      ja
1   nein    nein
2   nein    nein
3     ja      ja
4   nein      ja

# Als Kategorie kodieren (wie as.factor in R)
chi["vorher"] = chi["vorher"].astype("category")
chi["nachher"] = chi["nachher"].astype("category")
print(chi.dtypes)

vorher     category
nachher    category
dtype: object

# ▶︎ Datenquelle: Simulierte Werte für Demonstrationszwecke
# score_1 ~ N(100, 15), score_2 ~ N(105, 10)
np.random.seed(0)
pearson = pd.DataFrame({
    "score_1": np.random.normal(loc=100, scale=15, size=50),
    "score_2": np.random.normal(loc=105, scale=10, size=50)
})

# Deskriptive Statistik pro Spalte berechnen
desc_stats = {}

for col in pearson.columns:
    stats = describe(pearson[col])
    desc_stats[col] = {
        "Anzahl": stats.nobs,
        "Mittelwert": round(stats.mean, 2),
        "Standardabweichung": round(np.sqrt(stats.variance), 2),
        "Minimum": round(stats.minmax[0], 2),
        "Maximum": round(stats.minmax[1], 2),
        "Schiefe": round(stats.skewness, 2),
        "Kurtosis": round(stats.kurtosis, 2)
    }

# In DataFrame umwandeln für schöne Darstellung
desc_df = pd.DataFrame(desc_stats).T

# Tabelle anzeigen
print("\n▶︎ Deskriptive Statistik für 'pearson':\n")
print(desc_df)

plt.hist(pearson["score_1"], bins=15, color='deepskyblue', edgecolor='black')
plt.title("Histogramm des IQs")
plt.xlabel("IQ")
plt.ylabel("Anzahl")
plt.show()

# Numeric
var1 = [15, 85.15, 9999999]
print("var1:", var1)

# Logical / Boolean
var2 = [True, False]
print("var2:", var2)

# Mischung (wird in Python als Liste mit gemischten Typen behandelt)
var3 = ["Maier", "Klein", 18, 18.14]
print("var3:", var3)

var1: [15, 85.15, 9999999]
var2: [True, False]
var3: ['Maier', 'Klein', 18, 18.14]

print(type(var1))
print(type(var2))
print(type(var3))

<class 'list'>
<class 'list'>
<class 'list'>

# Erzeuge ein Array mit den Werten 1 bis 9
# mit den Dimensionen (3, 3, 4, 2)
arr = np.array(range(1, 10))
arr = np.tile(arr, 8)  # 9 * 8 = 72, um genug Werte für 3x3x4x2 = 72 zu haben
arr = arr.reshape((3, 3, 4, 2))

print(arr)

[[[[1 2]
   [3 4]
   [5 6]
   [7 8]]

  [[9 1]
   [2 3]
   [4 5]
   [6 7]]

  [[8 9]
   [1 2]
   [3 4]
   [5 6]]]


 [[[7 8]
   [9 1]
   [2 3]
   [4 5]]

  [[6 7]
   [8 9]
   [1 2]
   [3 4]]

  [[5 6]
   [7 8]
   [9 1]
   [2 3]]]


 [[[4 5]
   [6 7]
   [8 9]
   [1 2]]

  [[3 4]
   [5 6]
   [7 8]
   [9 1]]

  [[2 3]
   [4 5]
   [6 7]
   [8 9]]]]

mylist = [var1, var2, var3]
print(mylist)

[[15, 85.15, 9999999], [True, False], ['Maier', 'Klein', 18, 18.14]]

import pandas as pd

var4 = ["Haus2", "Haus3", "Haus4", "Haus1"]
var5 = [5, 6, 7, 8]
var6 = [9, 10, 11, 12]

Tab1 = pd.DataFrame({
    "var4": var4,
    "var5": var5,
    "var6": var6
})

print(Tab1)

    var4  var5  var6
0  Haus2     5     9
1  Haus3     6    10
2  Haus4     7    11
3  Haus1     8    12

# Wie alt bist du?
alter = 51

if alter < 20:
    print("Du Teenager!!!")
elif 21 < alter < 50:
    print("Du wirst auch nicht mehr jünger.")
else:
    print("Ich zähle die Tage bis zur Rente.")

Ich zähle die Tage bis zur Rente.

revar1 = 1

while True:
    print(revar1)
    revar1 += 2
    if revar1 > 9:
        break

anzahl = 5

for i in range(10, anzahl - 1, -1):  # von 10 bis 5 (einschließlich), rückwärts
    # anzahl += 1  # würde hier nichts ändern, da range bei Schleifenstart festgelegt ist
    print(i)

10
9
8
7
6
5

z = 0
while True:
    print(f"z = {z}")
    z += 2
    if z > 6:
        break

z = 0
z = 2
z = 4
z = 6

#from pydataset import data
titanic = data('titanic')
print(titanic.head())  # zeigt die ersten 5 Zeilen

       class     age  sex survived
1  1st class  adults  man      yes
2  1st class  adults  man      yes
3  1st class  adults  man      yes
4  1st class  adults  man      yes
5  1st class  adults  man      yes

# Nur die ersten 10 Zeilen anzeigen (Vorschau)
titanic.head(10)

# Prüfen auf fehlende Werte in der Spalte 'age'
titanic['age'].isna()

1       False
2       False
3       False
4       False
5       False
        ...  
1312    False
1313    False
1314    False
1315    False
1316    False
Name: age, Length: 1316, dtype: bool

# Zählen der fehlenden Werte in der Spalte 'age'
titanic['age'].isna().sum()

np.int64(0)

# Alle Zeilen mit fehlenden Werten löschen
titanic = titanic.dropna()

# Alternativ: Nur Zeilen löschen, in denen 'age' fehlt
titanic = titanic.dropna(subset=['age'])

# Vergleich vorher-nachher: Zeilenanzahl
titanic_original = data('titanic')
print("Vorher:", len(titanic_original))
print("Nachher:", len(titanic))

Vorher: 1316
Nachher: 1316

import matplotlib.pyplot as plt

plt.hist(titanic['class'].dropna(), bins=20, color='skyblue', edgecolor='black')
plt.title("Class")
plt.xlabel("Class")
plt.ylabel("Anzahl")
plt.show()

titanic = data('titanic')

# Neue Variable: Erkennung ob Person erwachsen ist
titanic = data('titanic')
titanic['is_adults'] = titanic['age'] == "adults"
print(titanic)

          class     age    sex survived
1     1st class  adults    man      yes
2     1st class  adults    man      yes
3     1st class  adults    man      yes
4     1st class  adults    man      yes
5     1st class  adults    man      yes
...         ...     ...    ...      ...
1312  3rd class   child  women       no
1313  3rd class   child  women       no
1314  3rd class   child  women       no
1315  3rd class   child  women       no
1316  3rd class   child  women       no

[1316 rows x 4 columns]
          class     age    sex survived  is_adults
1     1st class  adults    man      yes       True
2     1st class  adults    man      yes       True
3     1st class  adults    man      yes       True
4     1st class  adults    man      yes       True
5     1st class  adults    man      yes       True
...         ...     ...    ...      ...        ...
1312  3rd class   child  women       no      False
1313  3rd class   child  women       no      False
1314  3rd class   child  women       no      False
1315  3rd class   child  women       no      False
1316  3rd class   child  women       no      False

[1316 rows x 5 columns]

# Spalte wieder löschen
titanicdrop = titanic.drop(columns=['is_adults'])  # inplace=False
print(titanicdrop.head())

       class     age  sex survived
1  1st class  adults  man      yes
2  1st class  adults  man      yes
3  1st class  adults  man      yes
4  1st class  adults  man      yes
5  1st class  adults  man      yes

print(len(titanic))  # Anzahl der Zeilen
# Bestimmte Zeilen per Index löschen (z. B. Zeile 0 und 1)
titanic = titanic.drop(index=[1])
print(len(titanic))  # Anzahl der Zeilen

1316
1315

# Spalte umbenennen (z. B. 'survived' → 'überlebt')
titanic = data('titanic')
print(titanic.info())
titanic = titanic.rename(columns={"survived": "überlebt"})
print(titanic.info())

<class 'pandas.core.frame.DataFrame'>
Index: 1316 entries, 1 to 1316
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   class     1316 non-null   object
 1   age       1316 non-null   object
 2   sex       1316 non-null   object
 3   survived  1316 non-null   object
dtypes: object(4)
memory usage: 51.4+ KB
None
<class 'pandas.core.frame.DataFrame'>
Index: 1316 entries, 1 to 1316
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   class     1316 non-null   object
 1   age       1316 non-null   object
 2   sex       1316 non-null   object
 3   überlebt  1316 non-null   object
dtypes: object(4)
memory usage: 51.4+ KB
None

# Werte umkodieren (z. B. 'yes' → 'Ja', 'no' → 'Nein')
titanic["überlebt"] = titanic["überlebt"].map({"yes": "Ja", "no": "Nein"})
print(titanic.head(5))

       class     age  sex überlebt
1  1st class  adults  man      yes
2  1st class  adults  man      yes
3  1st class  adults  man      yes
4  1st class  adults  man      yes
5  1st class  adults  man      yes
       class     age  sex überlebt
1  1st class  adults  man       Ja
2  1st class  adults  man       Ja
3  1st class  adults  man       Ja
4  1st class  adults  man       Ja
5  1st class  adults  man       Ja

import seaborn as sns
import pandas as pd

# Titanic-Daten laden
titanic = data('titanic')
titanic.head()

# Datenauswahl – bestimmte Variablen extrahieren
neuedaten1 = titanic[["age", "class"]]
print("\n1. Auswahl bestimmter Variablen:")
print(neuedaten1.head())

1. Auswahl bestimmter Variablen:
      age      class
1  adults  1st class
2  adults  1st class
3  adults  1st class
4  adults  1st class
5  adults  1st class

# Auswahl der zweiten Spalte (Index 1)
neuedaten2 = titanic.iloc[:, 1]
print("Auswahl der zweiten Spalte:")
print(neuedaten2.head())

Auswahl der zweiten Spalte:
1    adults
2    adults
3    adults
4    adults
5    adults
Name: age, dtype: object

# Filter: nur Erwachsene
print( len(titanic))
neuedaten3 = titanic[titanic["age"] == "adults"]

print("\nNur Erwachsene (age == 'adults'):")
print( len(neuedaten3))

1316

Nur Erwachsene (age == 'adults'):
1207

# Filter: Erwachsene ODER weiblich
print( len(titanic))
neuedaten4 = titanic[(titanic["age"] == "adults") | (titanic["sex"] == "female") ]

print("\nNur Erwachsene (age == 'adults') oder weiblich:")
print( len(neuedaten4))

1316

Nur Erwachsene (age == 'adults') oder weiblich:
1207

# Filter: Erwachsene UND weiblich
print( len(titanic))
neuedaten4 = titanic[(titanic["age"] == "adults") & (titanic["sex"] == "female") ]

print("\nNur Erwachsene (age == 'adults') und weiblich:")
print( len(neuedaten4))

1316

Nur Erwachsene (age == 'adults') und weiblich:
0

No-Go-Liste¶

Ich bin eine Überschrift¶

Ich bin eine Überschrift¶

Ich bin eine Überschrift¶

Ich bin eine Überschrift¶

Ausgabe¶

optional¶

Deskriptive Statistik¶

Datentypen¶

Vector¶

Matrix¶

Meine erste Liste¶

Mein erster erzeugter Datensatz¶

Meine erste if- Bedingung¶

Schleifen¶

Finden und Löschen von Missing-Values (NA)¶

Welcher Datentyp hat mein Variable?¶

Erzeuge neue Variable¶

Eine Spalte löschen¶

Zeilen löschen¶

Umbenennen¶

Spalte umbenennen¶

Datenauswahl¶

	class	age	sex	survived
1	1st class	adults	man	yes
2	1st class	adults	man	yes
3	1st class	adults	man	yes
4	1st class	adults	man	yes
5	1st class	adults	man	yes
6	1st class	adults	man	yes
7	1st class	adults	man	yes
8	1st class	adults	man	yes
9	1st class	adults	man	yes
10	1st class	adults	man	yes