(ns test_cricket_ranking.data_processor | |
(:require [clojure.contrib.string :as str-utils :only (join)]) | |
(:use [util.charting charts]) | |
(:import [org.jblas DoubleMatrix Eigen]) | |
(:import (java.io BufferedReader FileReader)) | |
(:import (org.jfree.data.time Year TimeSeries TimeSeriesCollection ))) | |
(defn year_str_of_line [line] (nth (.split line "\t") 1)) | |
(defn match_year [line] | |
(Integer. (.trim (nth (.split (year_str_of_line line) "/") 0)))) | |
(defn match_between_dates? [line start_date end_date] | |
(let [year_of_match (match_year line)](and | |
(>= year_of_match start_date) | |
(<= year_of_match end_date)))) | |
(def countries | |
[ "Australia" "Bangladesh" "England" "India" | |
"New Zealand" "Pakistan" "South Africa" "Sri Lanka" | |
"West Indies" "Zimbabwe" ]) | |
(def countries_pattern (re-pattern (str-utils/join "|" countries))) | |
(defn get-country-names [line] | |
(take 2 (re-seq countries_pattern line))) | |
(defn teams [line, playing_countries] | |
(map #(playing_countries %) (get-country-names line))) | |
(defn winner [line, playing_countries] | |
(or (playing_countries (.trim (nth (.split line "\t") 3))) (nth (teams line playing_countries) 0))) | |
(def score-pattern | |
(re-pattern "([0-9]*)-([0-9]*) \\(([0-9]*)\\)")) | |
(defn scores [line] | |
(let [wins (nth (re-seq score-pattern (nth (.split line "\t") 4)) 0) | |
winner_score (Double. (nth wins 1)) | |
loser_score (Double. (nth wins 2)) | |
total_matches (Double. (nth wins 3))] | |
[(/ winner_score (+ total_matches 1)) (/ loser_score (+ total_matches 1))])) | |
(defn populate_matrix [line d playing_countries] | |
(let [teams (teams line playing_countries) scores (scores line) winner (winner line playing_countries) | |
loser (if (== winner (nth teams 0)) (nth teams 1) (nth teams 0))] | |
(.put d winner loser (+ (.get d winner loser) (nth scores 0))) | |
(.put d loser winner ( + (.get d loser winner) (nth scores 1))))) | |
(defn to_matrix [file_name start_year end_year playing_countries] | |
(let [d (DoubleMatrix/eye (count playing_countries))] | |
(with-open [rdr (BufferedReader. (FileReader. file_name))] | |
(doseq [line (filter #(match_between_dates? % start_year end_year) (line-seq rdr))] | |
(populate_matrix line d playing_countries))) d)) | |
(defn playing_countries [result_file start_year end_year] | |
(let [country_names (set (flatten (flatten (for [line (filter #(match_between_dates? % start_year end_year) | |
(.split (slurp result_file) "\n"))] (get-country-names line ))))) indexes (range (count country_names))] (zipmap (sort country_names) indexes))) | |
(defn abs [x] (if (pos? x) x (- x))) | |
(defn arg-max [coll] | |
(last (last (sort (seq (zipmap coll (range (count coll)))))))) | |
(defn get_max_eigen_index [d len] | |
(arg-max (let [idxs (range len) vals (for [i idxs] (abs (.real (.get d i i))))] vals))) | |
(defn compute_scores_by_teams [result_file start_year end_year, playing_countries] | |
(map abs (let [m (to_matrix result_file start_year end_year playing_countries) | |
size (count playing_countries) | |
ev (Eigen/eigenvectors m) | |
eigen_vectors (get ev 0) | |
eigen_values (get ev 1) | |
top_eigen_value_index (get_max_eigen_index eigen_values size) | |
all_team_scores (.getColumn eigen_vectors top_eigen_value_index)] | |
(for [team_id (range size)] (.getReal all_team_scores team_id))))) | |
(defn compute_ranks [result_file start_year end_year] | |
(let [playing_countries (playing_countries result_file start_year end_year)] | |
(if (empty? playing_countries) {} | |
(zipmap (sort (keys playing_countries)) | |
(compute_scores_by_teams result_file start_year end_year playing_countries))))) | |
(defn plot-scores [data] | |
(let [country_specific_ts {:au (TimeSeries. "Australia") :bn (TimeSeries. "Bangladesh") :en (TimeSeries. "England") :in (TimeSeries. "India") | |
:nz (TimeSeries. "New Zealand") :pk (TimeSeries. "Pakistan") :sa (TimeSeries. "South Africa") :sl (TimeSeries. "Sri Lanka") | |
:wi (TimeSeries. "West Indies") :zm (TimeSeries. "Zimbabwe")}] | |
(doseq [year (sort (keys data))] | |
(let [year_entry (Year. year) year_data (data year)] | |
;(println (:au country_specific_ts)) | |
(.add (:au country_specific_ts) year_entry (get year_data "Australia" 0)) | |
(.add (:bn country_specific_ts) year_entry (get year_data "Bangladesh" 0)) | |
(.add (:en country_specific_ts) year_entry (get year_data "England" 0)) | |
(.add (:in country_specific_ts) year_entry (get year_data "India" 0)) | |
(.add (:nz country_specific_ts) year_entry (get year_data "New Zealand" 0)) | |
(.add (:pk country_specific_ts) year_entry (get year_data "Pakistan" 0)) | |
(.add (:sa country_specific_ts) year_entry (get year_data "South Africa" 0)) | |
(.add (:sl country_specific_ts) year_entry (get year_data "Sri Lanka" 0)) | |
(.add (:wi country_specific_ts) year_entry (get year_data "West Indies" 0)) | |
(.add (:zm country_specific_ts) year_entry (get year_data "Zimbabwe" 0)) | |
)) | |
(let [tc (TimeSeriesCollection.)] | |
(.addSeries tc (:au country_specific_ts)) | |
(.addSeries tc (:bn country_specific_ts)) | |
(.addSeries tc (:en country_specific_ts)) | |
(.addSeries tc (:in country_specific_ts)) | |
(.addSeries tc (:nz country_specific_ts)) | |
(.addSeries tc (:pk country_specific_ts)) | |
(.addSeries tc (:sa country_specific_ts)) | |
(.addSeries tc (:sl country_specific_ts)) | |
(.addSeries tc (:wi country_specific_ts)) | |
(.addSeries tc (:zm country_specific_ts)) | |
(plot-time-series-collection tc "Year" "Score")) | |
)) | |
( plot-scores (zipmap (range 1890 2011) (for [year (range 1890 2011)] | |
(compute_ranks "/Users/karthik/workspace/clojure_matlab/td" (- year 3) year)))) | |
;(compute_ranks "/Users/karthik/workspace/clojure_matlab/td" 2007 2011) |
April 2005 May 2005 August 2005 September 2005 January 2006 February 2006 November 2006 September 2008 March 2010 April 2010 October 2010
Subscribe to Posts [Atom]