{ "news": [ { "date": "2025-06-07", "video_link": "https://www.youtube.com/watch?v=4CV-fB5EVJs", "twitter_text": "New Benchmark Results: Claude-sonnet-4 & Claude-opus-4 and Join Us in the Discord!", "twitter_link": "https://x.com/haoailab/status/1931438794756313530" }, { "date": "2025-06-03", "video_link": "https://www.youtube.com/watch?v=m6i9L6-pgu4", "twitter_text": "New Benchmark Results: How do top open-source models like Deepseek r1 & Qwen 3 perform on games?", "twitter_link": "https://x.com/haoailab/status/1929997363407708646" }, { "date": "2025-04-28", "video_link": "https://www.youtube.com/watch?v=OEQRhBKYxIE", "twitter_text": "Grok-3-mini-beta Joins the Battle: Outperforms Gemini 2.5 Flash, Challenges O3-mini Across Games — Full Grok-3-beta Power Yet to Come. 🚀", "twitter_link": "https://x.com/haoailab/status/1917309598861779021" }, { "date": "2025-04-24", "video_link": "https://www.youtube.com/watch?v=NB1-5aKV9v4", "twitter_text": "Zero-Shot AI Gaming Showdown: O3 Multi-Modal Might Sweeps Sokoban & 2048, Lands Top-2 in Phoenix Wright & Candy Crush", "twitter_link": "https://x.com/haoailab/status/1915464349558460422" }, { "date": "2025-04-15", "video_link": "https://www.youtube.com/watch?v=q8PMW870yp8", "twitter_text": "Ace Attorney AI Revolution: O1 & Gemini 2.5 Pro lead in courtroom reasoning, while GPT-4.1 matches older models. Cost analysis reveals Gemini 2.5 Pro's 6-15x efficiency over O1.", "twitter_link": "https://x.com/haoailab/status/1912231343372812508" }, { "date": "2025-04-08", "video_link": "https://www.youtube.com/watch?v=yoEo2Bk7PGA", "twitter_text": "LLaMA 4 Maverick hacks traditional benchmarks but struggles with real gameplay—our transparent leaderboard exposes the new AI challenge.", "twitter_link": "https://x.com/haoailab/status/1909712259326394519" }, { "date": "2025-04-01", "video_link": "https://www.youtube.com/watch?v=uFVpNor7l_E", "twitter_text": "Google's Gemini 2.5 Pro redefines AI gameplay: its multi-modal edge outperforms o1 & Claude 3.7 in Sokoban.", "twitter_link": "https://x.com/haoailab/status/1907140718650704204" }, { "date": "2025-03-18", "video_link": "https://www.youtube.com/watch?v=b-Uyz3W4yIg", "twitter_text": "Candy Crush Saga's Hidden Complexity: Top AI Models Take the Challenge", "twitter_link": "https://x.com/haoailab/status/1902095369808601551" }, { "date": "2025-03-14", "video_link": "https://www.youtube.com/watch?v=3aYDCSa3AWI", "twitter_text": "2048 Mastery: Only Two AI Models Crack the Code to Surpass Random Play", "twitter_link": "https://x.com/haoailab/status/1900645722095317255" }, { "date": "2025-03-06", "video_link": "https://www.youtube.com/watch?v=59enV32MBUE", "twitter_text": "Sokoban Showdown: o3-mini Dominates by Reaching Level 4", "twitter_link": "https://x.com/haoailab/status/1897792946646421514" }, { "date": "2025-02-28", "video_link": "https://www.youtube.com/watch?v=nixMIJZYAgg", "twitter_text": "Super Mario AI Revolution: Claude-3.7 Sets Unprecedented Gameplay Benchmarks", "twitter_link": "https://x.com/haoailab/status/1895557913621795076" } ] }