import pandas as pd                         # zum Einlesen und Verarbeiten von Daten
import matplotlib.pyplot as plt             # (falls du grafisch darstellen möchtest)
from scipy.stats import chi2_contingency, fisher_exact    # Chi-Quadrat-Test
import numpy as np                          # für mathematische Operationen
import plotly.graph_objects as go            # Sankey

chi = pd.read_excel("chi.xlsx")
chi.head(5)

# Kreuztabelle erzeugen (entspricht: counts <- table(vorher, nachher))
counts = pd.crosstab(chi["vorher"], chi["nachher"])

# Farben für Balken
colors = ["deepskyblue", "tomato"]

# Balkendiagramm gruppiert (stacked=False → gruppiert)
counts.plot(kind="barh", 
            color=colors, 
            figsize=(8, 5), 
            width=0.7,
            legend=True)

# Titel und Achsen
plt.title("Balkendiagramm zur Studierbereitschaft\n(Nachher)", fontsize=14)
plt.xlabel("Studierbereitschaft vorher", fontsize=12)
plt.ylabel("Anzahl Personen", fontsize=12)

# Legende
plt.legend(title="nachher", loc="upper right")
plt.tight_layout()
plt.show()

#import plotly.graph_objects as go

# 1. Definiere Knoten und Links
labels = ["hoch_v", "niedrig_v", "hoch_n", "niedrig_n"]

# Links definieren: Quellindex, Zielindex, Werte
sources = [0, 0, 1, 1]   # hoch_v, hoch_v, niedrig_v, niedrig_v
targets = [2, 3, 2, 3]   # hoch_n, niedrig_n, hoch_n, niedrig_n
values =  [6, 9, 25, 5]

# 2. Sankey-Diagramm erzeugen
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=30,
        line=dict(color="black", width=0.5),
        label=labels,
        color="lightblue"
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values
    ))])

fig.update_layout(title_text="Sankey-Diagramm: Studierbereitschaft", font_size=13)
fig.show()

# Kreuztabelle erstellen (beobachtete Werte)
counts = pd.crosstab(chi["vorher"], chi["nachher"])
print("Beobachtete Werte:\n")
print(counts)

Beobachtete Werte:

nachher  hoch   niedrig
vorher                 
hoch         6        9
niedrig     25        5

# Chi-Quadrat-Test ohne Yates-Korrektur
chi2, p, dof, expected = chi2_contingency(counts, correction=False)

print("\nErwartete Werte (gerundet):\n")
print(pd.DataFrame(np.round(expected, 2), 
                   index=counts.index, 
                   columns=counts.columns))

Erwartete Werte (gerundet):

nachher  hoch   niedrig
vorher                 
hoch     10.33     4.67
niedrig  20.67     9.33

round(((6-10.33)**2/10.33)+((25-20.67)**2/20.67)+((9-4.67)**2/4.67)+((5-9.33)**2/9.33), 1)

8.7

ch = chi2_contingency(counts, correction = False) # correct = F ohne Anpassungen -  die "normale" Formel
ch

Chi2ContingencyResult(statistic=np.float64(8.761520737327189), pvalue=np.float64(0.0030765241259335468), dof=1, expected_freq=array([[10.33333333,  4.66666667],
       [20.66666667,  9.33333333]]))

round(
    (((abs(6-10.33))-0.5)**2/10.33)+
    (((abs(25-20.67))-0.5)**2/20.67)+
    (((abs(9-4.67))-0.5)**2/4.67)+
    (((abs(5-9.33))-0.5)**2/9.33)
,2)

6.84

ch = chi2_contingency(counts, correction = True) # Die Anpassung nach Yates - correct = TRUE - Die Formel wird um -0.5 erweitert
ch

Chi2ContingencyResult(statistic=np.float64(6.8562788018433185), pvalue=np.float64(0.008833031444231193), dof=1, expected_freq=array([[10.33333333,  4.66666667],
       [20.66666667,  9.33333333]]))

ch = chi2_contingency(counts, correction = False)
print(ch)

# Fisher-Test (nur für 2x2 Tabellen)
oddsratio, p_fisher = fisher_exact(counts)
print("\nFisher-Test:")
print(f"Odds Ratio: {oddsratio:.4f}")
print(f"p-Wert: {p_fisher:.4f}")

Chi2ContingencyResult(statistic=np.float64(8.761520737327189), pvalue=np.float64(0.0030765241259335468), dof=1, expected_freq=array([[10.33333333,  4.66666667],
       [20.66666667,  9.33333333]]))

Fisher-Test:
Odds Ratio: 0.1333
p-Wert: 0.0057

ch = chi2_contingency(counts, correction = True)
print(ch)

# Fisher-Test (nur für 2x2 Tabellen)
oddsratio, p_fisher = fisher_exact(counts)
print("\nFisher-Test:")
print(f"Odds Ratio: {oddsratio:.4f}")
print(f"p-Wert: {p_fisher:.4f}")

Chi2ContingencyResult(statistic=np.float64(6.8562788018433185), pvalue=np.float64(0.008833031444231193), dof=1, expected_freq=array([[10.33333333,  4.66666667],
       [20.66666667,  9.33333333]]))

Fisher-Test:
Odds Ratio: 0.1333
p-Wert: 0.0057

import researchpy as rp


# Kreuztabelle + Statistik (wie CrossTable in R)
table, results = rp.crosstab(chi['vorher'], chi['nachher'], prop='col', test='chi-square')

print("Kreuztabelle:")
print(table)
print("\nChi²-Test-Ergebnisse:")
print(results)

Kreuztabelle:
        nachher                
nachher   hoch  niedrig     All
vorher                         
hoch      19.35   64.29   33.33
niedrig   80.65   35.71   66.67
All      100.00  100.00  100.00

Chi²-Test-Ergebnisse:
                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    8.7615
1                    p-value =    0.0031
2               Cramer's phi =    0.4412

/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/researchpy/crosstab.py:146: FutureWarning:

Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'Pearson Chi-square ( 1.0) = ' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.

# Anzahl Zeilen
anzahl = len(chi)
print(f"Der Datensatz hat {anzahl} Zeilen.")

Der Datensatz hat 45 Zeilen.

# Chi²-Test ohne Korrektur
chi2, p, dof, expected = chi2_contingency(counts, correction=True)
print(f"Das Chi-Quadrat liegt bei {chi2:.2f}.")

Das Chi-Quadrat liegt bei 6.86.

# Cramérs V berechnen
cramersV = np.sqrt(chi2 / (anzahl * (min(counts.shape) - 1)))
print(f"Die Effektstärke liegt bei {cramersV:.2f}.")

Die Effektstärke liegt bei 0.39.

Fall	Fisher	Yates
Fall 1	-	-
Fall 2	-	+
Fall 3	+	-
Fall 4	+	+

Hypothese¶

Alternativ¶

Vorausetzungen¶

import - Befehle¶

Datensatz¶

Balkendiagramm¶

Sankey Diagram¶

Kreuztabelle und beobachteten Werte¶

Erwarteten Werte¶

Ergebnis zum $\chi^2$ und Prüfung der Signifikanz¶

Berechnung der Freiheitsgrade¶

Fall 1¶

Fall 2¶

Fall 3¶

Fall 4¶

Alternative Darstellung mit gmodels¶

Cramers V¶

Aussage¶

Alternative¶