// Fall 2024 project images
import nbastatspredictorfig1 from "../images/f24-projects/nba_stats_predictor_fig1.png";
import nbastatspredictorfig2 from "../images/f24-projects/nba_stats_predictor_fig1.png";
import f1fig1 from "../images/f24-projects/f1_fig1.PNG";
import f1fig2 from "../images/f24-projects/f1_fig2.png";

// Winter 2024 project images
import nflW24fig1 from "../images/w24-projects/nfl_ingame_win_predictor_fig1.png";
import cricketfig1 from "../images/w24-projects/cricket_fig1.png";
import cricketfig2 from "../images/w24-projects/cricket_fig2.png";
import marchmadnessfig1 from "../images/w24-projects/march_madness_w24_fig1.png";
import marchmadnessfig2 from "../images/w24-projects/march_madness_w24_fig2.png";
import nhafafig1 from "../images/w24-projects/nhl_fa_fig1.png";
import nhafafig2 from "../images/w24-projects/nhl_fa_fig2.png";
import eurosfig1 from "../images/w24-projects/euros_fig1.png";
import nbashootingfig1 from "../images/w24-projects/nba_shooting_fig1.jpg";
import nbashootingfig2 from "../images/w24-projects/nba_shooting_fig2.jpg";
import nfldraftfig1 from "../images/w24-projects/nfldraft_fig1.png";
import nfldraftfig2 from "../images/w24-projects/nfldraft_fig2.png";
import nfldraftfig3 from "../images/w24-projects/nfldraft_fig3.png";

// march madness images
import seaborn from "../images/march-madness/seabornPrintOut.png";
import lossGraph from "../images/march-madness/allLoss.png";

//soccer images
import epl_transfers_fig1 from "../images/soccer/revamp_epl_fig1.png";
import epl_transfers_fig2 from "../images/soccer/revamp_epl_fig2.png";
// import topTransfers from "../images/soccer/topTransfers.png";
// import transferScores from "../images/soccer/transferScores.png";

// betting images
import sportbooks from "../images/betting/sportbooks.png";

// wbb images
// import wbbFeature from "../images/wbb/feature-importance.png";
import paa from "../images/wbb/PAA.png";
import win from "../images/wbb/win-probability.png";

// football images
import feature from "../images/football/feature-importance.png";
import table from "../images/football/yes-no.png";

// daily fantasy images
import chart from "../images/daily-fantasy/fantasy-chart.png";

// hockey images
import hockeyTable from "../images/hockey/michigan-players-table.PNG";
import model from "../images/hockey/model-testing.PNG";
// player cards
import hockeyBellCurve from "../images/hockey/player-card/hockey-bell-curve.png";
import hockeyPlayerScores from "../images/hockey/player-card/hockey-player-scores.png";
// player projections
import hockeyScatterplot from "../images/hockey/player-projection/ncaa-scatter.png";
import hockeyStats from "../images/hockey/player-projection/ncaa-player-stats.png";

// idx images
import powerRankings from "../images/idx/power_rankings.png";
import teamPic from "../images/idx/team_photo.PNG";

// nba images
import nbaPlayerTable from "../images/basketball/nba-matchup-fig1.png";
import nbaGamesScatterplot from "../images/basketball/nba-matchup-fig2.png";

// nfl images
import nflProjections from "../images/football/nfl-projections.png";
import nflGraphs from "../images/football/nfl-graphs.png";

// Important Note: There must be at least as many paragraphs as there are images for each project.

const projects = [
  {
    title: "F1 Pure Skill Index",
    authors: ["Alex Quintero, Aaron Li, Zach Levine"],
    text: [`In Formula 1, race results and championship wins are often heavily influenced by car performance, making it 
    difficult to measure a driver's true skill. Even an average driver in a dominant car can win championships, while great 
    drivers in slower cars may never get the recognition they deserve. This project aimed to answer a fundamental question: 
    who is the best F1 driver if all cars were equal? To find out, we focused on the only fair comparison available — teammates 
    racing in the same machinery — and developed an ELO-based ranking system to measure driver skill without car 
    performance bias. \n\n
    Our model analyzes every race in F1 history, updating driver ratings after each head-to-head comparison between teammates. 
    Drivers start with a base ELO rating of 1500, which adjusts based on their performance relative to expectations. For example, 
    if a driver with a higher ELO rating finishes far ahead of their teammate, their rating increases more significantly. Conversely, 
    underperformance leads to a rating decrease. The system accounts for differences in finishing positions, making larger gaps 
    between teammates result in bigger ELO shifts. This methodology allows us to create a dynamic, race-by-race ranking of driver 
    skill that evolves over time. \n\n
    The results were fascinating and somewhat unexpected: Fernando Alonso consistently ranked as the best driver of the modern era. 
    Despite winning fewer championships than other legends, Alonso dominated the ELO rankings for 291 races (around 15 years), a 
    stretch unmatched by any other driver. He also achieved the highest peak rating, with 2516.5 points, far ahead of Verstappen 
    (2104.6) and Raikkonen (1806.4). One of Alonso’s most impressive performances was against Felipe Massa during their time at 
    Ferrari from 2010 to 2014, where Alonso finished ahead in 65 out of 76 races. This rivalry earned him 596 ELO points — the 
    largest gain from a single teammate battle in our dataset. \n\n
    The project highlights that while championships require a great car, true driver skill can be measured by outperforming a 
    teammate in equal machinery. This ELO-based system provides a fresh perspective on driver performance, allowing us to appreciate 
    drivers who excelled regardless of their car’s capabilities. \n\n
    To learn more about this project, <a href="https://thankful-package-80c.notion.site/WSA-17b7215213bc8026a3b4f77f8ba547b9" target="_blank" rel="noopener noreferrer" style="color:red;">click here</a>.`],
    images: [
      {
        image: f1fig1,
        caption: "Figure 1 - Evolution of Elo ratings for F1's top drivers over the past 25 years."
      },
      {
        image: f1fig2,
        caption: "Figure 2 - Evolution of ELO ratings in the Alonso vs. Massa battle. Shaded area marks their time as teammates.."
      },
    ],
    route: "f1-pure-skill-index"
  },
  {
    title: "NBA Stats Predictor",
    authors: ["Dario Hoxha, TJ Carr, Michael Esparza"],
    text: [`In the Fall 2024 semester, we worked on the development of the NBA Stats Predictor. It's aim was to create an 
      interactive model predicting NBA players stats in the 2024-2025 NBA season based on the minutes played that were inputted. 
      Our approach began with extensive data analysis, where we carefully examined player performances to identify patterns and 
      correlations between minutes played and individual stats. The predictive model proved effective in estimating player contributions, 
      offering possible valuable insights for coaches, analysts, and of course the fans!\n\n
      Using our coded algorithm, we generated insightful visualizations with Seaborn to analyze the data. These graphs highlighted trends 
      and relationships in the key metrics we used. Figure 1 shows the relationship between points and minutes played for all NBA players 
      that played in the 2024-2025 NBA season, with outliers depicted in red. As expected, players who played more minutes tended to score 
      more points. However, the relationship between assists and minutes played wasn't as strong as shown in Figure 2, where popular NBA 
      point guards from the 2024-2025 season are highlighted in red.`],
    images: [
      {
        image: nbastatspredictorfig1,
        caption: "Figure 1 - Points vs Minutes Played."
      },
      {
        image: nbastatspredictorfig2,
        caption: "Figure 2 - Assists vs Minutes Played."
      }
    ],
    route: "nba-stats-predictor"
  },
  {
    title: "2024 NFL Draft Predictor",
    authors: ["Evan Chechak, Kushal Patel, Alex Abraham, Adam Chouman"],
    text: [`During the Spring 2024 Semester, our project team embarked on a comprehensive analysis of the NFL Draft 
    prospects, focusing specifically on quarterbacks and receivers. Our objective was to develop a predictive model 
    that accurately ranked these players based on a combination of college statistics, measurable attributes, combine 
    results, and professional scouting analytics. To construct our model, we web scraped data from a multitude of sources, 
    including college statistics databases, combine results, and scouting reports (Pro Football Focus, ESPN, 
    Daniel Jeremiah, etc).\n\n
    One major hurdle was the inconsistency in data availability; some prospects either did not attend the NFL Combine or 
    had incomplete college statistics due to injuries, which required us to normalize each data point to ensure comparability 
    across all players. We incorporated various metrics into our model, including traditional college performance indicators 
    such as passing yards, completion percentage, and receiving yards, as well as combine measurements like hand size and 
    40-yard dash times. A key challenge was determining the weight of each metric in our predictive formula. For instance, 
    we debated the relative importance of quarterback rating (QBR) versus combine statistics, and we had to carefully consider 
    how to weigh these factors given the different environments of combine performances versus pro days held at universities. 
    Another complexity was addressing the skill disparities between different college divisions. For example, a quarterback 
    from the Big 12 might face different competition compared to one from the Big Sky Conference. We incorporated strength 
    of schedule and opponent strength into our analysis to account for these variations, adjusting our model to reflect the 
    true skill level of each player.\n\n
    Our model's results were promising. Although we will need to wait for at least the 2024 NFL season to fully validate our 
    predictions, our rankings were notably close to the actual draft positions. Every quarterback and receiver we ranked was 
    drafted within two spots of our predicted position inside their position group, demonstrating the effectiveness of our 
    predictive approach. Some future goals for this project include expanding our model to include rankings for all position 
    groups, such as defensive players and special teams. We also plan to refine our methodology by weighting college statistics 
    based on team success, addressing factors such as the impact of poor quarterback play on receivers and the benefits of a 
    strong offensive line for quarterbacks. We look forward to revisiting this project in a few years to assess how well our 
    model performs in predicting NFL success compared to draft outcomes.`],
    images: [
      {
        image: nfldraftfig1,
        caption: "Figure 1 - Collected Player Data."
      },
      {
        image: nfldraftfig2,
        caption: "Figure 2 - Top 10 Quarterback Overalls."
      },
      {
        image: nfldraftfig3,
        caption: "Figure 3 - Top 10 Wide Receiver Overalls."
      }
    ],
  
    route: "nfl-draft-predictor"
  },
  {
    title: "NFL In-Game Win Predictor",
    authors: ["David McDermott, Connor O’Malley, Louis Simeone, Gaurab Adhikari, Dane Jorgensen"],
    text: [`We created a model to calculate the win probability of a specific NFL team at a given moment of any game. 
    The model considers the current score, the amount of time remaining, the current play's down and distance, 
    the position on the field, and each team's statistics in the current season. It makes an estimation for the likelihood 
    that the team calls a rush or pass play and calculates a distribution of percentages for the likelihood of reaching 
    every yard on the field. It then estimates the remainder of the drive and the remainder of the game to calculate 
    an expected points value for the rest of the game. By comparing the expected points of the two teams, it outputs 
    each team's win probability.\n\n
    The model can also be used to compare play-calling decisions, especially in third or fourth down decisions. 
    By calculating the expected points for a punt compared to a fourth down conversion attempt, for example, the model 
    can provide an estimate of how each decision affects the offense's chance of winning. It can also be applied to 
    rush/pass decisions, as separate expected yards distributions can be generates for each situation. With this functionality, 
    the model could be employed by a coach or coordinator to analyze their decisions and increase the 
    likelihood of making the correct call.`],
    images: [
      {
        image: nflW24fig1,
        caption: "Figure 1 - An example win probability graph."
      },
    ],
  
    route: "nfl-ingame-win-predictor"
  },
  {
    title: "ICC Men's T20 World Cup Simulator",
    authors: ["Padma Danturty, Ian Loree, Bryce Carson, Aryan Shah, Sam Dorfman, Maddie Coe"],
    text: [`In the winter 2024 semester, we worked on a model to simulate the results of the 2024 ICC Men’s T20 World Cup. 
      Our primary goals were to predict the outcome of all 55 matches, especially the overall winner, and to predict the 
      run difference between teams in each match based on per-player statistics. To accomplish this task, we utilized 
      ball-by-ball data of T20I matches from the past four years which had been collected by Cricsheet, and 
      gathered them into a master dataframe for analysis using the Pandas library. We calculated runs per bowl and 
      runs conceded per bowl for each player on a World Cup team using this dataframe, adjusting by the 
      current T20I team rankings to account for differences in skill levels among teams.\n\n
      We then used these per-player statistics as point estimates for which to bound a simple uniform distribution, 
      drawing a random value from that distribution for each bowler and batter matchup. With the roster for each 
      World Cup team made up of three bowlers, three batters, and one all-rounder, each team’s score in a match is 
      aggregated from drawn run values across 96 bowls six for each bowler-batter matchup. Following the format 
      of the T20 World Cup, each simulation consists of 55 matches separated into a Group Stage, Super 8 Stage, 
      and a Knockout stage. In each simulation, we recorded the winner of each match, the points earned by 
      each team (2 for a win), total runs scored, total runs conceded, and Net Run Rate for further analysis.\n\n
      Finally, we ran 1200 simulations to predict the expected placements and overall tournament bracket, 
      and visualized our work for presentation. We look forward to watching the World Cup, 
      comparing our results with the actual tournament, and potentially expanding on our 
      model with this new data in the future!`],
    images: [
      {
        image: cricketfig1,
        caption: "Figure 1 - T20I ball-by-ball data from Cricsheet."
      },
      {
        image: cricketfig2,
        caption: "Figure 2 - Predictions for New Zealand."
      }
    ],
  
    route: "cricket-wc-simulator"
  },
  {
    title: "2024 March Madness Bracket Predictor",
    authors: ["Vaelone Elankumaran, Smayan Ranjan, Nathan D'Souza, Steven Lan"],
    text: [`In 2024 Winter, we worked on a March Madness bracket predictor by creating a list of all 
      the teams in the tournament ranked from best to worst based on statistics we acquired during web-scraping. 
      We scraped statistics including Defensive Efficiency, assist/turnover ratio, strength of schedule rating, 
      and 85 others from every NCAA team dating back to the March Madness tournament in 2007. Then we scraped 
      the amount of wins each team had in the tournament. As we scraped we pushed all of this data into SQL, 
      resulting in over 80000 data points in our table. Finally we combined these stats into a linear regression 
      to predict a win total for the teams in 2024.\n\n
      Our regression involved using Ordinary Least Squares, a multinomial form of linear regression using 
      scikit learn to provide coefficients for each of those relevant statistics. Using these coefficients, along 
      with the statistics for the teams in the 2024 tournament, we calculated projected win totals for every team 
      that was in the tournament. We filled out the bracket using these projections, which resulted in us scoring 
      1140 points, good enough for 86th percentile on all of ESPN. However, when using the same algorithm on the 
      2022 and 2023 bracket we had far less success, scoring in the neighborhood of the 40th percentile both years. 
      Something of note though is that for all three years it was able to correctly identify the odds on favorite, 
      with our top teams being Uconn, Houston, and Gonzaga for the last three years. March madness is known for 
      being unpredictable, so it makes sense overall we weren't very successful. But, it is positive at least that 
      we were able to match the results of the bookmakers for all three of those years. All in all, this 
      project served as a solid learning experience and foundation in knowledge for python, SQL, and utilizing 
      various python libraries in conjunction to conduct machine learning.`],
    images: [
      {
        image: marchmadnessfig1,
        caption: "Figure 1 - Projected win totals for the 2024 Tournament."
      },
      {
        image: marchmadnessfig2,
        caption: "Figure 2 - Statistics for the various teams in SQL."
      }
    ],
  
    route: "march-madness-w24-predictor"
  },
  {
    title: "NHL Free Agency Analysis",
    authors: ["Dylan Pham, Trevor Hogland"],
    text: [`Utilizing a data-driven algorithm, our project aims to match teams with the ideal free agent 
      acquisitions, emphasizing player performance through a "Player Score" statistic and the context of 
      cap space, with Moneypuck and CapFriendly as primary data sources. For example, the algorithm proposes 
      the Detroit Red Wings sign defenseman Brady Skjei, taking advantage of their available cap space and 
      improving their defense’s average Player Score.\n\n
      However, our project holds some limitations: the exclusion of goalie evaluations, not accounting for plans
      past one season, and the likelihood of altering cap space and salary values. Designed to be most effective 
      right when free agency begins, the algorithm could benefit from improvements such as integrating future 
      salary predictions, including predictions for resigning current players, broadening recommendations 
      beyond single-player solutions, and refining the automation processes to increase the credibility of its results.`],
    images: [
      {
        image:  nhafafig1,
        caption: "Figure 1 - MoneyPuck table we scraped for Player Scores and names of players."
      },
      {
        image:  nhafafig2,
        caption: "Figure 2 - Teams' player scores before and after signing our recommended player."
      }
    ],
  
    route: "nhl-free-agency"
  },
  {
    title: "UEFA Euro 2024 Predictor",
    authors: ["Nicholas Amalraj, Ayan Nair, Varad Chapalgaonkar, Joshua Bodapati"],
    text: [`In the Winter 2024 Semester, we decided to create a machine-learning model that successfully predicted the winner 
      of the UEFA 2024 European Championship coming up this June. By aggregating dozens of metrics from multiple previous 
      competitions for each country, we used a Random Forest Classifier to use these statistics to simulate full tournaments to 
      determine the next European Champion.\n\n
      Although we won't know the accuracy of our model for another two months, we can confidently say our team has gotten a very 
      strong introduction to important tech skills such as web scraping, handling databases, extracting important information 
      from CSV files, and using that data to train machine learning models. We had a blast making this project, and are confident 
      that it will be able to successfully be able to predict the correct matchups, and ultimately, the winner of the entire competition.`],
    images: [
      {
        image: eurosfig1,
        caption: "Figure 1 - Average of the difference in expected goals versus actual goals over multiple competitions for each country."
      },
    ],
    route: "euro2024-predictor"
  },
  {
    title: "NBA Shooting Analysis",
    authors: ["Ray Steensland"],
    text: [`In the winter of 2024, we developed a project to analyze the NBA shooting statistics that correlated most to team success. 
      Intrigued by the analytics-driven offensive strategy of the D'Antoni-Harden Houston Rockets teams, our team set out to determine 
      what shooting metrics most contributed to offensive efficiency through regression analysis. With the help of the Beautiful Soup Library, 
      we web scraped team data from Basketball-Reference.com of the 2022-23 NBA regular season. We compiled these numbers into data frames 
      using Pandas. These data frames included each team's attempts, makes, and percentages for two-pointers, three-pointers, and field goals, 
      respectively. In addition, we gathered metrics of success like offensive rating, points per game, and wins as our dependent variables. \n\n
      After running each shooting metric through regression analysis using the Scikit-Learn library, the strongest indicator of a successful 
      offensive rating was three-point percentage. To further analyze these metrics, our team used the win totals to divide the dataset into 
      two groups: playoff and lottery teams. While the strategy of pure volume 3-point shooting alone did not strongly indicate a high offensive 
      rating, our data revealed a significant trend in the shot selection between postseason and lottery teams. After taking the average attempts 
      of each group, there was an extra 7% gap in favor of two-point attempts over threes for the losing squads. Despite presumptively trailing 
      in games and needing more threes, the less successful team's shared the faulty strategy of not shooting enough of them. Overall, 
      this projects demonstrates how basic regression analysis and grouping can provide teams with points of emphasis for decisions on 
      offensive gameplan and personnel. `],
    images: [
      {
        image: nbashootingfig1,
        caption: "Figure 1 - Correlation between 3-pointer percentage and offensive efficiency."
      },
      {
        image: nbashootingfig2,
        caption: "Figure 1 - Disparity in shot selection between playoff and nonplayoff teams."
      },
    ],
    route: "nba-shooting"
  },
  {
    title: "NCAA Hockey Player Projections",
    authors: [],
    text: [`During the Fall 2023 Semester, two of our project teams had the opportunity to work with 
    Michigan’s Men’s Hockey Varsity Team with the purpose of creating useful models in order to easier 
    evaluate players. The main goal of this project team was to create an equivalency rating that estimated 
    how a player playing in the United States Hockey League (USHL), the premier junior hockey league 
    in the United States, would translate to the National Collegiate Athletic Association Hockey (NCAA Hockey). 
    In short, a number would be produced to effectively explain how much a skater in the USHL is 
    worth in the NCAA, which would specifically help the University with comparing players across 
    different leagues. Through the usage of EliteProspects, we were able to compile the statistics 
    of almost 9000 players that had played in the NCAA dating back to 2003 (the year the playoff 
    bracket expanded), as well as over 5000 players who had played in the USHL during that same span.\n\n
    From these expansive lists, we were able to see which players had performed in both leagues, 
    which turned out to be roughly 3000 players, and directly compare their statistics between the 
    two leagues. From here, in order to find a finalized metric, we took inspiration from Patrick 
    Bacon’s formula for finding an NHL equivalency score, which compiled statistics from a variety 
    of professional leagues and compared them to the National Hockey League. By creating a scaling 
    factor that weighted each player based on their total number of games played against an average 
    number of players, we were able to more accurately depict a final output. However, with the 
    limited amount of statistics that we had access to for each individual player (mainly points 
    per game), it was difficult to fine tune the formula like other projects where we might have 
    access to over 100 data points per subject. The final computation that we completed emitted 
    an 0.833 score, which means that 1 point in the USHL is worth 0.833 points in the NCAA. 
    Additionally, we attempted to form an equation that predicted how a specific player would 
    translate from the USHL to the NCAA, which is graphed in the NCAA predictor graph. This 
    equation proved to produce a mean squared error of 0.048, which is the difference between a 
    player’s actual points per game in the NCAA compared to their expected score. \n\n
    Overall, these two findings helped the hockey team create a defined comparison between the USHL and 
    the NCAA and help get a grasp for what a player might produce in the NCAA based upon his 
    USHL statistics. In the future, we hope to expand this project to include goalie statistics 
    and compare other leagues to the NCAA other than just the USHL.`],
    images: [
      {
        image: hockeyStats,
        caption: "Figure 1 - Player data from EliteProspects"
      },
      {
        image: hockeyScatterplot,
        caption: "Figure 2 - Scatterplot displaying how well the predictor did against actual data"
      }
    ],
    route: "hockey-player-projections"
  },
  {
    title: "NFL Game Predictor",
    authors: [],
    text: [`In fall 2023, we developed a machine learning model that generates an accurate betting 
    spread for every future NFL game. Using the Beautiful Soup library, members of this project team 
    scraped hundreds of metrics from Football Reference to obtain a comprehensive team profile of 
    every NFL franchise. Additionally, the team exhibited their expertise of Pandas by merging these 
    team profiles into a master data frame that contained every matchup since the 2016 season. 
    This 7 years sample size allowed our team to build an organized database that was brilliantly 
    formatted to build a TensorFlow regression program. \n\n
    This deep neural network took into account a plethora of stats, from simple metrics such as 
    passing touchdowns and turnovers to advanced analytics such as average depth of target and blitz 
    percentage. By manipulating the model’s dropout, epochs, kernel initialization, learning rate, and layers, 
    we were able to minimize our training loss and successfully produce realistic output for every future NFL 
    game. We quantified the success of our model by tracking the rate it predicts the outcome to a margin of 1 
    score and the odds of beating the Vegas sportsbooks. From weeks 10 to 18, our model peaked with predicting 
    11/14 matchups within 1 score of the actual result. Additionally, we were accustomed to about a 
    58% success rate in beating Vegas odds, with our best week winning 14/16. Overall, this project 
    showcases the power of machine learning in predicting NFL games and its potential to offer 
    valuable insights into projected standings and other predictive metrics in the realm of sports analytics.`],
    images: [
      {
        image: nflProjections,
        caption: "Figure 1 - Predicted outcomes for Week 10 and picks for week 16 of 2023 season"
      },
      {
        image: nflGraphs,
        caption: "Figure 2 - Left: Plot of the loss function during model training. Right: Model performance weeks 10 - 18"
      }
    ],
    route: "nfl-game-predictor"
  },
  {
    title: "NBA Team Matchup Simulator",
    authors: [],
    text: [`During the 2023 Fall, we worked on the development of the NBA Team Matchup Simulator. 
    Its primary objective is to predict the outcomes of NBA games and offer a unique perspective 
    on how individual players impact their teams during games. Our approach to this project was to 
    simulate these games one play at a time, emulating the play-by-play that would happen in a real game. 
    The goal was to have a program that was able to predict the win shares between two teams of 
    five players each. Our project began with an extensive data collection phase, where we gathered 
    over 400,000 data points from 1,000+ games of the 2021-2022 NBA season. This data, meticulously 
    scraped from Basketball Reference and official NBA Gamebooks, provided the foundation for our 
    analysis. With this dataset we assessed and quantified individual player impact, their contribution 
    to team performance, and their influence on opponents' gameplay. A key step in our process was 
    creating a league average player using this data which allowed us to normalize the performance of all 
    other players against this benchmark. Using these normalized player profiles, we simulated 
    our games through a refined Monte Carlo simulation process. We compiled a comprehensive list of potential 
    plays and involved players. \n\n
    By applying the normalized stats, we assigned probabilities to each 
    player's involvement in the next play that were then randomly selected. This approach allowed us 
    to simulate a one complete game. By repeatedly running these simulations, we could generate  
    statistically significant win percentages for two teams. The project proved to be quite accurate, 
    successfully predicting 7 out of 8 matchups in the 2022 playoffs. Moreover, our model facilitated 
    the simulation of hypothetical games and matchups involving impossible lineups. Despite its success, 
    we recognize that there are areas for further improvement in our simulation model.`],
    images: [
      {
        image: nbaPlayerTable,
        caption: "Figure 1 - An example of the data we collected. Note that for all plays, we care not just about the players directly involved but rather all the players on the court",
      },
      {
        image: nbaGamesScatterplot,
        caption: "Figure 2 - Example matchup scores over 5000 simulated games between MIA and MIL. This game did not happen during the 2022 playoffs, but MIA is projected to win this matchup about 80 percent of the time"
      }
    ],
    route: "nba-matchup-simulator"
  },
  {
    title: "Michigan Hockey Player Cards",
    authors: [],
    text: [`In the fall of 2023, we had the privilege of working with the Michigan Men’s Hockey 
    team to create different scores for hockey players. Our project was to be able to combine the 
    statistics that they regularly use and work with to create a model to determine each player’s 
    production, offense, defense, and transition scores. Those four scores were then combined and 
    weighed differently based on position to create an overall score for each player. The overall 
    goal for this project was to make an easier way for coaches and other personnel to be able to 
    have an easy way to look at the data for each player. In combination with the work done by 
    Michigan Men’s Hockey director of analytics Anthony Ciatti and his team, we were able to coordinate 
    which statistics the Michigan analytics team focuses on when looking at the different skills 
    in the game. For example, while production is the traditional goals, assists, and points, there 
    are others like shots, successful pass to slot for while on ice, PDP/20, and OGP/20 that make 
    up the score for offense. Using Python libraries like xgBoost and Scikit-Learn, these stats 
    were then able to be trained against the already-calculated Expected Goals For WOI (While On Ice), 
    Expected Goals Against WOI, and a myriad of different rates that were combined to train the 
    transition score. \n\n
    Our model then used this training to implement an algorithm to be applied to 
    each player, being able to calculate their respective scores. We were then able to graph the 
    results, making sure a nice bell curve was the distribution of scores as a way to keep the model 
    in check. In order to determine their overall score, the previously calculated production and 
    offense scores were weighted higher for forwards, while the defense and transition scores were 
    weighted higher. These scores then make it incredibly easy to see what kind of the player the 
    coaches are looking at. For example, it is easy to tell who a high-scoring forward is, with 
    high scores in both production and offense, while offensive defensemen and defensive forwards 
    are easy to determine as well. Thus, it makes it simple for coaches to glance at the scores and 
    help with their decision-making.`],
    images: [
      {
        image: hockeyBellCurve,
        caption: "Figure 1 - Distribution of the offense scores, displaying general bell curve (players not played make up highest frequency)"
      },
      {
        image: hockeyPlayerScores,
        caption: "Figure 2 - A sample forward player with higher scores in production and offense, lower scores in transition and defense"
      },
    ],
    route: "hockey-player-cards"
  },
  {
    title: "NCAA March Madness Bracket Predictor",
    authors: [],
    text: [`In 2023, we tackled a project that attempted to predict the unpredictable: March Madness. 
    The aim of the project was to develop a scalable model that could accurately predict the outcome 
    of the NCAA Division I Men’s Basketball Tournament based on both the 2022-2023 regular season data 
    as well as the past seventeen tournaments dating back to 2005. Members of the web scraping team 
    demonstrated their mastery of the BeautifulSoup library by gathering data from a multitude of 
    credible sources, including Basketball Reference, KenPom, and RealGM. We utilized these websites 
    to attain over 100 metrics, scaling from simple statistics such as field goal percentage to advanced 
    statistics including team tendencies and efficiency margins. Next, we standardized the statistics 
    to include thousands of historical and current teams in a MySQL database. \n\n
    Using TensorFlow and a variety of other Python libraries, the analytics team first investigated the 
    relationships between certain statistics using Seaborn and NumPy (Figure 1), and then created a sequential, 
    deep neural network of dense nodal layers that power ranked the teams by their postseason potential. This 
    analysis was further optimized through the adjustment of a variety of hyperparameters, including but 
    not limited to: dropout, regularizers, kernel initialization and regularization, optimizers, 
    normalization layers, learning rate, layer activation, training epochs, and batch size in order 
    to minimize loss (Figure 2). The output was then interpreted into head to head matchups and formatted 
    into a head to head bracket that finished in the 84th percentile in the country. Then we ran 
    the model on past years to check its historical performance and began to design improvements to the 
    model. Overall, this project highlighted the potential of machine learning and web scraping in sports 
    analytics and provided a valuable insight into the performance of college basketball teams in the 
    2022-2023 NCAA tournament.`],
    images: [
      {
        image: seaborn,
        caption: "Figure 1 - Seaborn pairplot output for data inspection"
      },
      {
        image: lossGraph,
        caption: "Figure 2 - Both stages of training in an absolute loss graph"
      },
    ],
    route: "march-madness"
  },
  {
    title: "Soccer Transfer Market Analysis",
    authors: ["Cole Fernandes, Olaf Dsouza, Zack Eisman, Maddie Coe, Louis Simeone, Philip Churchley, Sherry Zhang, Youngha Cho"],
    text: [`In 2023, we created a model to analyze the soccer transfer market. Our goal was to use soccer data to determine 
      the impact that a player's transfer has had on their team. Our model analyzed data from the past 5 premier league seasons. 
      The project scraping sub-team's primary goal was to obtain data for the analytics sub-team to use. For each player, it was 
      necessary to scrape simple statistics such as goals and assists as well as advanced stats like xG and progressive carries. 
      In order to accomplish this, the scraping team used the python library BeautifulSoup to scrape various fbref.com sites as 
      well as transfermarkt.com. Once the data was collected, it was stored in a shared central location using MySQL. 
      The final dataframe consisted over over 1,600 players and over 40 values for each. \n\n
      Once this data was collected, the analytics sub-team worked to develop a score for each player based on their statistics 
      relative to the yearly positional average in the EPL. Statistics were weighted differently by position as each has 
      different aspects that are more important relative to the other positions. These scores were then aggregated to calculate 
      a season score for each team, which we used to create a predicted Premier League table for each season, as shown in Figure 1. 
      This model has resulted in an average magnitude of 2.18 places away in the EPL table over the 5 years. The analytics 
      sub-team also ranked individual transfers in different categories by analyzing transfer data and comparing the percentile 
      of each player's individual season score to the percentile of their transfer fee (Figure 1). Finally, this sub-team also 
      compared the Premier League teams based on their ability to gain squad value while limiting their amount of money spent on 
      transfers (Figure 2). These values were calculated by taking the sum of their transfer players' season scores and comparing 
      it to the log value of the amount of money spent in each season. The log value was necessary as there is a heavy right skew 
      in the Premier League club's transfer budgets. \n\n
      From this project, we could determine that clubs such as Brighton and Crystal Palace are very transfer-savvy, which 
      matches general intuition. Additionally, the model also suggests that the Premier League's most successful clubs in 
      recent years (Manchester City, Liverpool) do not rely heavily on transfers at all.\n\n
      To learn more about this project, <a href="https://drive.google.com/file/d/1koG901w0k3gkBrU5eN_OVa5f6ZoP7b4J/view?usp=sharing" target="_blank" rel="noopener noreferrer" style="color:red;">click here</a>.`],
    images: [
      {
        image: epl_transfers_fig1,
        caption: "Figure 1 - WSA Team Rankings vs Actual EPL Results and Top Transfers from the Season."
      },
      {
        image: epl_transfers_fig2,
        caption: "Figure 2 - EPL Transfer Window Performance by Club over the Last 5 Seasons."
      },
    ],
    route: "soccer-transfer-market"
  },
  {
    title: "idX Ski & Snowboard Power Rankings",
    authors: [],
    text: [`In 2022, we collaborated with Infinite Degrees (idX) to develop a power rankings 
    system for professional skiers and snowboarders. Three subteams (webscraping, 
    API, and analytics) worked at the Desai Accelerator office alongside idX founder (and WSA founder) 
    Brendan Hart to develop the power rankings system (Figure 1). We scraped competition results (such as those from
    the Winter Olympics) from PDF files and competition websites and stored these in a comprehensive database. \n\n
    This data was then converted into easily accessible JSON files (unique player objects that stored ID, 
    discipline, attempts, etc.). We created API documentation and utilized idX's private API endpoints 
    to retrieve useful data to analyze. Each athlete's score is based on a collection of tricks/competitions 
    they participated in. We analyzed and assigned weights to different trick components (e.g. quad cork 1800 > 
    frontside 360) based on FIS scoring guidelines and normalized the sum of total tricks to generate the score. 
    Our rankings are integrated with the idX website to present the first public ski and snowboard 
    competition rankings (Figure 2).`],
    images: [
      {
        image: teamPic,
        caption: "Figure 1 - Team photo at the Desai Accelerator"
      },
      // {
      //   image: scrapeToJson,
      //   caption: "Figure 2 - Example of scraping competition results into a JSON file"
      // },
      {
        image: powerRankings,
        caption: "Figure 2 - Power rankings displayed on idX website (idx.style/rankings)"
      }
    ],
    route: "idx-power-rankings",
  },
  {
    title: "Michigan Hockey Project",
    authors: [],
    text: [`In 2021, we initiated a project with the Michigan Men's NCAA Hockey Team. 
          Utilizing webscraper tools, we have gathered a comprehensive hockey database 
          to be used to create different models and graphs that the hockey team will use. 
          This encompassing database includes in-depth statistics on Michigan players since 
          2015 (Figure 1), as well as all coaches, player's boxsheets, and games played since 
          2014. We tested and developed many different models using this data to discover 
          relationships between different stats (Figure 2), such as the impact one stat has 
          on another, and whether this influence is significant to improving efficient game play. \n\n
          Currently, we are in the process of developing a website that allows Michigan coaches 
          and players on the hockey team to utilize our findings and data to have an advantage 
          over other teams in the NCAA Hockey league. This website will have one section 
          dedicated to displaying the stats of all current players of all teams in the NCAA 
          Hockey league with interactive graphs and charts. The other section of the website 
          will be a win predictor that analyzes the percent chance of Michigan winning against 
          another hockey team using machine learning (weights and biases) with the scraped data. 
          This section will also include additional insights that contribute towards the calculation 
          of the win percentage.`],
    images: [
      {
        image: hockeyTable,
        caption: "Figure 1 - A sample of the web-scraped data; this table displays the stats of Michigan players",
      },
      {
        image: model,
        caption: "Figure 2 - Model testing using ridge regression and random forest algorithm to find variable relationships",
      },
    ],
    route: "michigan-hockey",
  },
  {
    title: "Michigan WBB Win Predictor",
    authors: [],
    text: [`Women’s college basketball is a sport that is yet to capitalize 
          on the capabilities of machine learning. At the start of the Fall semester 
          in 2019, Wolverine Sports Analytics undertook an analytics project with 
          the Michigan Women’s Basketball Team. While reporting to graduate manager 
          and former NBA G-League point guard, Harry Rafferty, Wolverine Sports Analytics 
          developed an interactive Excel application. The purpose of this application was 
          to allow managers, players, or coaches to easily spot the strengths and weaknesses 
          of opposing teams in addition to individual players. Every single college basketball 
          game is unique, but collectively, valuable insights can be made. \n\n
          Using this idea, we developed personalized logistic regression algorithms that would 
          determine winning strategies against a particular Big Ten women’s basketball team. For 
          example, an insight that our algorithm could produce might be: attaining a 3-point 
          field goal percentage one standard deviation above the mean at the cost of attaining 
          an offensive rebound percentage one standard deviation below the mean results in a 
          5% decrease in win-probability. Due to the fact that we developed a unique algorithm 
          for each Big Ten team, we were able to achieve very strong results such as a 95% 
          accuracy against Maryland over the past 4 seasons. Essentially, without knowing the 
          score of either team, the algorithm was able to predict the correct winner based on 
          inputs like turnover percentage, free-throw attempt rate, etc (Figure 2). In addition, 
          the team can easily test different rotations and determine predicted rotation success. 
          This was a great experience for WSA, and we anticipate future work with the women’s basketball team.`],
    images: [
      {
        image: paa,
        caption: "Figure 1 - Web-scraped data and color-coding indicating comparison to league average (red = below avg, green = above avg)",
      },
      { 
        image: win,
        caption: "Figure 2 - Calculated win probability for Michigan WBB given they perform to these statistics",
      },
    ],
    route: "michigan-wbb",
  },
  {
    title: "Michigan Football Recruiting",
    authors: [],
    text: [`The world of college recruiting continues to evolve into a potential A.I.-driven 
          competition. In 2018, Wolverine Sports Analytics had the opportunity to work with the 
          Michigan Football Team to try and discover a potential recruiting edge that they could 
          adopt. Under the guidance of J.T. Rogan, former Director of Communications & Operations 
          for the Head Coach, we created an algorithm for predicting the likelihood of a recruit 
          committing to Michigan using a brand new analytical approach. Nearly every high school 
          recruit uses Twitter as a means of making announcements regarding recruiting, so we decided 
          to incorporate self-made Twitter statistics (Figure 1) into our algorithm. Along with 
          recruit personal information, high school information, and official-offer and visit information, 
          our algorithm used variables such as percentage of tweets related to Michigan, percentage 
          of liked tweets related to Michigan, number of Michigan coaching figures followed on Twitter, 
          and more. We tested many forms of machine learning algorithms including random forest, L1 
          and L2 logistic regression, and extreme gradient boosting. \n\n
          Ultimately, we achieved our best 
          results using random forest algorithms which when tested on our dataset achieved an overall 
          accuracy of 90.5% while maintaining an individual-recruit accuracy of 54.28% (Figure 2). With 
          a deeper dataset and extraction of other social media data, we believe we could achieve higher 
          individual-recruit. Overall, this was a very challenging yet rewarding project for WSA as a 
          proof of concept for the Michigan Football Team.  2018, Wolverine Sports Analytics had the 
          opportunity to work with the Michigan Football Team to try and discover a potential recruiting 
          edge that they could adopt. Under the guidance of J.T. Rogan, former Director of Communications 
          & Operations for the Head Coach, we created an algorithm for predicting the likelihood of a 
          recruit committing to Michigan using a brand new analytical approach.`],
    images: [
      {
        image: feature,
        caption: "Figure 1 - Self-made Twitter features ranked by importance",
      },
      {
        image: table,
        caption: "Figure 2 - Final results based on our model",
      },
    ],
    route: "michigan-football",
  },
  {
    title: "FanDuel Lineup Generator",
    authors: [],
    text: [`In nearly every professional sport, daily fantasy competitions exist on platforms like Fanduel 
    and Draftkings (Figure 1). During the 2017-18 school year, Wolverine Sports Analytics decided to 
    create an advanced data pipeline and system of algorithms for optimizing NBA daily fantasy competition 
    submissions. Daily fantasy basketball consists of creating salary constrained lineups of 9 NBA 
    players; 2 players at the point guard, shooting guard, small forward, and power forward position,
     and 1 player at the center position. The better the NBA player (for example LeBron James) the 
     higher the salary which lowers your allotted salary able to be spent on other players (Figure 2). 
     A lineup’s fantasy score is calculated by summing the lineup’s points, rebounds, assists, steals, 
     and blocks. There are 2 main forms of daily fantasy basketball competition: tournaments and 50/50s. 
     In tournaments, thousands of competitors submit low-cost buy-in lineups (generally between $0.25 and $5) 
     with large payouts being awarded to the top lineups in the pool (payouts generally in the thousands of 
      dollars and given to the top 5-10 entries). In 50/50s, on the other hand, buy-ins vary by the specific 
      50/50 competition (generally as low as $1 to as high as $250), and a roughly 1.8x payout is given to 
      the top half of entries (a 20 entry pool would result in 10 winning entries and 10 losing entries). 
      From a game theory and data standpoint, the strategy for tournament entries is different from the 
      strategy for 50/50 entries due to the nature of the competitions. In a 50/50 competition, the only 
      goal is to be better than half of the pool’s entries because the payout is the same for all winning 
      entries. For this reason, you should aim to have a high degree of confidence in an entry’s fantasy 
      score along with having low variance players. When it comes to tournaments, however, almost every 
      entry will be a losing entry, so there is no benefit to creating a ‘safe’ lineup. \n\n
      The goal is to create a high variance lineup that has the possibility of being the top entry in the 
      tournament. The architecture behind this project consisted of locating data sources, retrieving data, storing 
      data, preparing and cleaning data, developing models and algorithms, backtesting results, scheduling 
      cron jobs, deploying models, and creating salary-optimized lineups for use. WSA club members learned 
      and utilized Python, R, HTML, CSS, and SQL throughout the entirety of the project including Python 
      libraries such as BeautifulSoup, Pandas, Scikit Learn, and Selenium. Scikit Learn algorithms used 
      included ridge and lasso regression, random forest decision trees, and neural networks. WSA also 
      had the opportunity to meet in person with University of Wisconsin-Madison statistics professor and 
      researcher Dr. Bob Wardrop. During the meeting, WSA was able to ask Dr. Wardrop further questions 
      on his research into the Simpson’s Paradox and the “Hot Hand” concept in basketball and how his 
      findings could be capitalized on in this project.`],
    images: [
      {
        image: sportbooks,
        caption: "Figure 1 - Online platforms with Daily Fantasy",
      },
      {
        image: chart,
        caption: "Figure 2 - Example lineup generated by our model",
      },
    ],
    route: "daily-fantasy",
  },
];

export default projects;
