Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
E
exam-statistics
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Simon Pintarelli
exam-statistics
Commits
6332a49a
Commit
6332a49a
authored
Jan 28, 2017
by
Simon Pintarelli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update statistics.Rnw, add some wrapper scripts
parent
24bca575
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
218 additions
and
21 deletions
+218
-21
init-spreadsheet-from-edoz.py
init-spreadsheet-from-edoz.py
+103
-0
pandas-script.py
pandas-script.py
+18
-0
statistics.Rnw
statistics.Rnw
+74
-21
xls2csv.py
xls2csv.py
+23
-0
No files found.
init-spreadsheet-from-edoz.py
0 → 100755
View file @
6332a49a
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Exctract the columns 'Legi-Number', 'Last Name', 'First Name' from the file
Grades*txt (the one exported from edoz). Store result as xls.
"""
# to_excel needs python-xlwt
import
pandas
as
pa
import
argparse
import
os
selected_columns
=
[
'Number'
,
'Last Name'
,
'First Name'
]
header_de
=
[
'Id'
,
'Familienname'
,
'Vorname'
,
'Nummer'
,
'Sekretariat'
,
'Fachrichtung'
,
'Rep.'
,
'Note'
,
'+ / - (Notentendenz)'
,
'* (Abbruch/nicht erschienen)'
,
'Prüfsumme'
]
header_en
=
[
'Id'
,
'Last Name'
,
'First Name'
,
'Number'
,
'Administration Office'
,
'Direction'
,
'Rep.'
,
'Grade'
,
'+ / - (Grade Trend)'
,
'* (dropout/no show)'
,
'Checksum'
]
translation
=
{
'Id'
:
'Id'
,
'Familienname'
:
'Last Name'
,
'Vorname'
:
'First Name'
,
'Nummer'
:
'Number'
,
'Sekretariat'
:
'Administration Office'
,
'Fachrichtung'
:
'Direction'
,
'Rep.'
:
'Rep.'
,
'Note'
:
'Grade'
,
'+ / - (Notentendenz)'
:
'+ / - (Grade Trend)'
,
'* (Abbruch/nicht erschienen)'
:
'* (dropout/no show)'
,
'Prüfsumme'
:
'Checksum'
}
def
extract_columns
(
args
):
"""
"""
try
:
edoz_data
=
pa
.
read_csv
(
args
.
src
,
sep
=
'
\t
'
,
encoding
=
'ISO-8859-1'
)
except
:
raise
Exception
(
'Could not read from csv-file '
+
str
(
args
.
src
)
+
str
(
'
\n
Check that file exists and is ISO-8859-1 encoded'
))
if
'Familienname'
in
edoz_data
.
columns
:
# there is a tab past there last column => pandas reads an additional empty
# column (filled with nan) get rid of this additional column
edoz_data
=
edoz_data
[
header_de
]
# translate headers to EN
assert
(
all
([(
i
in
header_de
or
i
in
header_en
)
for
i
in
edoz_data
.
columns
]))
new_col_names
=
[
translation
[
c
]
for
c
in
edoz_data
.
columns
]
edoz_data
.
columns
=
new_col_names
elif
'Last Name'
in
edoz_data
:
# there is a tab past there last column => pandas reads an additional empty
# column (filled with nan) get rid of this additional column
edoz_data
=
edoz_data
[
header_en
]
else
:
raise
Exception
(
'Something with the column-headers is wrong (expected to find Last Name). Check your input file
%
s.'
%
fname
)
grade_columns
=
edoz_data
[
selected_columns
]
for
i
in
range
(
1
,
5
):
grade_columns
[
'A
%
d'
%
i
]
=
''
if
not
os
.
path
.
exists
(
'marks.xls'
)
or
args
.
force
:
grade_columns
.
to_excel
(
'marks.xls'
,
encoding
=
'ISO-8859-1'
,
index
=
False
)
else
:
raise
Exception
(
'./marks.xls exists. Delete or rename it and try again.'
)
if
args
.
to_csv
:
if
not
os
.
path
.
exists
(
'marks.csv'
)
or
args
.
force
:
grade_columns
.
to_csv
(
'marks.csv'
,
sep
=
','
,
encoding
=
'ISO-8859-1'
,
index
=
False
)
else
:
raise
Exception
(
'./marks.csv exists. Delete or rename it and try again.'
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
'src'
,
help
=
'exported edoz file (Grades|Noten)*txt'
)
parser
.
add_argument
(
'-f'
,
'--force'
,
help
=
'overwrite *csv,xls without asking'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--to-csv'
,
help
=
'Also save as csv (ISO-8859-1)'
,
action
=
'store_true'
)
args
=
parser
.
parse_args
()
extract_columns
(
args
)
pandas-script.py
0 → 100644
View file @
6332a49a
# coding: utf-8
import
pandas
as
pa
data
=
pa
.
read_csv
(
'grades-template.txt'
,
sep
=
'
\t
'
,
encoding
=
'ISO-8859-1'
)
grades
=
pa
.
read_csv
(
'grades-out.csv'
,
sep
=
','
,
encoding
=
'ISO-8859-1'
)
tmp
=
grades
[[
'Number'
,
'Grade'
]]
out
=
pa
.
merge
(
data
,
tmp
,
on
=
(
'Number'
))
out
=
out
[[
'Id'
,
'Last Name'
,
'First Name'
,
'Number'
,
'Administration Office'
,
'Direction'
,
'Rep.'
,
'Grade_y'
,
'+ / - (Grade Trend)'
,
'* (dropout/no show)'
,
'Checksum'
]]
out
.
columns
=
[
'Id'
,
'Last Name'
,
'First Name'
,
'Number'
,
'Administration Office'
,
'Direction'
,
'Rep.'
,
'Grade'
,
'+ / - (Grade Trend)'
,
'* (dropout/no show)'
,
'Checksum'
]
out
.
to_csv
(
'edoz-final.txt'
,
sep
=
'
\t
'
,
encoding
=
'ISO-8859-1'
,
index
=
False
)
knapp
=
out
[
out
[
'Grade'
]
==
3.75
]
statistics.Rnw
View file @
6332a49a
% Author: Simon Pintarelli <simon.pintarelli@sam.math.ethz.ch>
% -------------------------------------------------------------
% Input: marks.csv
% (with columns: 'First Name', 'Last Name', 'Number', 'A1', ...'An')
% -------------------------------------------------------------
% Output:
% - tex-file to generate pdf
% - 'grades-out.txt'
% -------------------------------------------------------------
% Emacs hint: use M-x ess-noweb-set-code-mode, then type R-mode
% Author: Simon Pintarelli <simon.pintarelli@sam.math.ethz.ch>
% -------------------------------------------------------------
\documentclass[a4paper]{article}
\usepackage{subfig}
...
...
@@ -44,7 +53,7 @@ int_grades <- c(1, 4, 6, 6)
# from http://stackoverflow.com/questions/11030898/knitr-how-to-align-code-and-plot-side-by-side
partWidth <- 45
fullWidth <- 80
options(width
=
fullWidth)
options(width
=
fullWidth)
## option() settings, just for the current chunk
knit_hooks$set(r.opts=local({
...
...
@@ -65,9 +74,7 @@ knit_hooks$set(r.opts=local({
## case, wrap the usual textual output in LaTeX code placing it in a
## narrower adjustbox environment and setting the graphics that it
## produced in another box beside it.
defaultChunkHook <- environment(knit_hooks[["get"]])$defaults$chunk
codefigChunkHook <- function (x, options) {
main <- defaultChunkHook(x, options)
before <-
...
...
@@ -113,46 +120,77 @@ library(xtable)
@
<<prepare data, echo=FALSE>>=
dset <- read.csv('
data
.csv', encoding='ISO-8859-1')
dset <- dset[, !(names(dset) %in% c("
Familienname", "Vorn
ame"))]
dset <- melt(dset, id=c("Num
m
er"))
dset <- read.csv('
marks
.csv', encoding='ISO-8859-1')
dset <- dset[, !(names(dset) %in% c("
Last.Name", "First.N
ame"))]
dset <- melt(dset, id=c("Num
b
er"))
dset$value <- as.numeric(dset$value)
total_points <- group_by(dset, Num
m
er) %>% summarize(s=sum(value))
total_points <- group_by(dset, Num
b
er) %>% summarize(s=sum(value))
linscale <- data.frame(approx(int_points, int_grades, seq(0, maxpoints, by=dp)))
scale <- data.frame(grade=floor(linscale$y*1/dg)*dg, s=linscale$x)
grades <- data.frame(left_join(x=total_points, y=scale, by="s"))
colnames(grades) <- c("legi", "points", "grade")
grades$legi <- as.factor(grades$legi)
# plot grading scale (linear interpolation and projection to existing marks)
#
# plot grading scale (linear interpolation and projection to existing marks)
scale_plot <- cbind(linscale, col=rep("linear interpolation", nrow(linscale)))
colnames(scale_plot) <- c("s", "grade", "col")
scale_plot <- rbind(scale_plot, cbind(scale, col=rep("rounded", nrow(linscale))))
@
<<store grades, echo=FALSE>>=
## export (legi, grade) to csv
write.csv(data.frame(Number=grades$legi, Grade=grades$grade),
fileEncoding='ISO-8859-1',
file="grades-out.csv",
row.names=FALSE,
quote=FALSE, na="")
@
\section*{Grades}
\label{sec:grades}
<<grades, echo=FALSE, warning=TRUE, fig.width=5, fig.height=5, out.width="0.49\\linewidth", fig.show='hold', fig.env="figure*", fig.subcap=c("Histogram", "CDF"), fig.pos='h!'>>=
ggplot(grades, aes(x=grade)) + geom_histogram(binwidth = 0.25, alpha=0.8) + scale_x_continuous(breaks=seq(1,6, by=1)) + ylab("Num. Stud")
ggplot(grades, aes(x=grade)) + stat_ecdf() + scale_x_continuous(breaks=seq(1,6, by=1)) + ylab("Percentage")
ggplot(grades, aes(x=grade)) +
geom_histogram(binwidth = 0.25, alpha=0.8) +
scale_x_continuous(breaks=seq(1,6, by=1)) +
ylab("Num. Stud")
ggplot(grades, aes(x=grade)) +
stat_ecdf() +
scale_x_continuous(breaks=seq(1,6, by=1)) +
ylab("Percentage")
@ %def
<<violing, echo=FALSE, fig.height=1, fig.width=3, fig.show='hold'>>=
ogrades <- cbind.data.frame(x=rep("grade", nrow(grades)), y=grades$grade)
opoints <- cbind.data.frame(x=rep("points", nrow(grades)), y=grades$points)
ggplot(ogrades, aes(x=x,y=y)) + geom_violin() + geom_boxplot(width=.1, fill="black", outlier.colour=NA) +
stat_summary(fun.y=median, geom="point", fill="white", shape=21, size=2.5) + coord_flip() + ylab("") + xlab("")
ggplot(opoints, aes(x=x,y=y)) + geom_violin() + geom_boxplot(width=.1, fill="black", outlier.colour=NA) +
stat_summary(fun.y=median, geom="point", fill="white", shape=21, size=2.5) + coord_flip() + ylab("") + xlab("")
ggplot(ogrades, aes(x=x,y=y)) +
geom_violin() +
geom_boxplot(width=.1, fill="black", outlier.colour=NA) +
stat_summary(fun.y=median, geom="point", fill="white", shape=21, size=2.5) +
coord_flip() +
xlab("") +
ylab("")
ggplot(opoints, aes(x=x,y=y)) +
geom_violin() +
geom_boxplot(width=.1, fill="black", outlier.colour=NA) +
stat_summary(fun.y=median, geom="point", fill="white", shape=21, size=2.5) +
coord_flip() +
xlab("") +
ylab("")
@
<<grading_scale_plot, opts.label="codefig", echo=FALSE, results='asis', cache=FALSE, fig.width=5, fig.height=5>>=
I <- match(seq(1, 6, by=dg), scale$grade)
plot1 <- ggplot(scale_plot, aes(x=s, y=grade, col=col)) + geom_line() + scale_y_continuous(breaks=seq(1,6,by=0.5)) +
scale_x_continuous(breaks=seq(0, maxpoints, by=ceiling(maxpoints/10))) + xlab("points") + theme(legend.position="bottom")
plot1 <- ggplot(scale_plot, aes(x=s, y=grade, col=col)) +
geom_line() +
scale_y_continuous(breaks=seq(1,6,by=0.5)) +
scale_x_continuous(breaks=seq(0, maxpoints, by=ceiling(maxpoints/10))) +
xlab("points") +
theme(legend.position="bottom")
plot1 + theme(legend.title=element_blank())
print(xtable(data.frame(Grade=scale$grade[I], Points=scale$s[I])), include.rownames=FALSE, floating=FALSE)
@
...
...
@@ -172,14 +210,29 @@ ppassed <- round(100*passed/ncand, 1)
<<out,echo=FALSE,warning=FALSE, fig.width=5, fig.height=5, out.width="0.49\\linewidth",fig.cap="Point distribution (per task)",fig.show='hold',fig.subcap=c("Histogram","CDF"), fig.env="figure*", fig.pos='h!'>>=
ggplot(dset, aes(x=value)) + geom_histogram(binwidth=1, alpha=0.8) + facet_wrap(~variable, scales="free") + ylab("Num. Stud.") + xlab("Points")
ggplot(dset, aes(x=value)) + stat_ecdf() + facet_wrap(~variable, scales="free") + xlab("Points") + ylab("Percentage")
ggplot(dset, aes(x=value)) +
geom_histogram(binwidth=1, alpha=0.8) +
facet_wrap(~variable, scales="free") +
ylab("Num. Stud.") +
xlab("Points")
ggplot(dset, aes(x=value)) +
stat_ecdf() +
facet_wrap(~variable, scales="free") +
xlab("Points") +
ylab("Percentage")
@ %def
<<total, echo=FALSE, warning=FALSE, fig.width=5, fig.height=5, out.width="0.49\\linewidth", fig.cap="Point distribution (total)", fig.show='hold', fig.subcap=c("Histogram", "CDF"), fig.env='figure*', fig.pos='h!'>>=
ggplot(total_points, aes(x=s)) + geom_histogram(binwidth=2, alpha=0.8) + ylab("Num. stud.") + xlab("Total points") + xlim(0, maxpoints)
ggplot(total_points, aes(x=s)) + stat_ecdf() + xlim(0, maxpoints) + xlab("Total points")
ggplot(total_points, aes(x=s)) +
geom_histogram(binwidth=2, alpha=0.8) +
ylab("Num. stud.") +
xlab("Total points") +
xlim(0, maxpoints)
ggplot(total_points, aes(x=s)) +
stat_ecdf() +
xlim(0, maxpoints) +
xlab("Total points")
@ %def
<<density, echo=FALSE, warning=FALSE, out.width="0.49\\linewidth", fig.cap="Total", fig.show='hold', fig.pos='h!', fig.height=3>>=
...
...
xls2csv.py
0 → 100755
View file @
6332a49a
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Convert xls to csv in ISO-8859-1 encoding. Unfortunately python pandas does not
support odt.
"""
import
pandas
as
pa
import
sys
import
argparse
import
os
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
'src'
,
help
=
'path/to/xls'
)
parser
.
add_argument
(
'dst'
,
help
=
'output'
)
args
=
parser
.
parse_args
()
if
not
os
.
path
.
exists
(
args
.
src
):
raise
Exception
(
'Could not read from file '
+
str
(
args
.
src
))
data
=
pa
.
read_excel
(
args
.
src
)
data
.
to_csv
(
args
.
dst
,
index
=
False
,
encoding
=
'ISO-8859-1'
,
sep
=
','
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment