Spaces:
Runtime error
Runtime error
Model Pair,loss p-value,unpermuted loss,permuted losses | |
meta-llama/Llama-2-7b-hf vs codellama/CodeLlama-7b-hf,0.01,2.3246312141418457,12.414887428283691,10.436723709106445,11.463930130004883,10.289920806884766,10.242432594299316,10.102094650268555,10.40760612487793,11.008111000061035,10.213240623474121,10.67473316192627,10.590666770935059,11.035821914672852,11.31821060180664,10.628593444824219,10.762565612792969,10.666658401489258,10.025702476501465,11.167806625366211,9.551054000854492,10.156463623046875,11.520145416259766,10.491896629333496,11.043285369873047,9.333309173583984,11.062390327453613,12.100906372070312,11.202133178710938,9.852943420410156,11.296211242675781,10.389328956604004,11.376590728759766,10.070525169372559,10.026528358459473,9.897621154785156,11.314379692077637,10.841796875,10.254968643188477,10.329296112060547,9.514544486999512,10.902949333190918,11.021820068359375,10.437745094299316,10.649450302124023,10.367554664611816,10.411920547485352,10.035419464111328,10.890546798706055,10.692540168762207,10.53729248046875,11.218603134155273,9.837448120117188,11.327438354492188,9.788040161132812,10.384784698486328,11.534051895141602,11.14419174194336,10.694536209106445,11.900223731994629,10.65000057220459,12.121938705444336,10.567901611328125,11.527382850646973,9.785840034484863,11.457867622375488,10.247855186462402,10.888710021972656,10.499954223632812,10.134185791015625,10.207050323486328,10.164628028869629,10.550536155700684,11.72325325012207,9.784914016723633,9.676228523254395,9.838054656982422,10.10443115234375,10.668523788452148,11.105070114135742,10.959217071533203,10.044930458068848,10.379700660705566,10.720422744750977,10.373221397399902,11.601597785949707,9.767145156860352,11.08609390258789,11.894039154052734,10.29943561553955,9.70090103149414,10.862321853637695,10.94301700592041,10.51127815246582,10.600202560424805,10.773964881896973,10.515543937683105,10.527175903320312,9.276633262634277,10.818120002746582,10.66445541381836,9.917793273925781 | |
meta-llama/Llama-2-7b-hf vs openlm-research/open_llama_7b,0.64,11.575419425964355,11.40134334564209,11.22384262084961,11.46405029296875,11.452263832092285,10.455619812011719,10.975679397583008,11.58855152130127,12.451857566833496,11.924081802368164,12.242836952209473,11.277565002441406,11.563451766967773,11.68265151977539,11.848937034606934,10.44631290435791,12.024474143981934,11.507471084594727,10.45692253112793,11.344988822937012,10.991713523864746,11.589402198791504,11.106851577758789,11.331841468811035,11.409880638122559,11.256386756896973,11.77776050567627,11.199036598205566,11.516610145568848,11.03821849822998,11.872330665588379,11.290637969970703,11.223627090454102,11.23520278930664,11.499801635742188,11.516365051269531,11.601478576660156,10.872085571289062,11.779101371765137,12.180392265319824,11.032282829284668,11.411971092224121,11.81676197052002,11.883353233337402,11.750810623168945,11.176957130432129,10.957067489624023,14.970661163330078,11.218901634216309,12.29248046875,11.31027603149414,11.960100173950195,11.291650772094727,11.711487770080566,12.310693740844727,11.598724365234375,11.308028221130371,11.46600341796875,11.952092170715332,11.185503959655762,11.173068046569824,11.149730682373047,11.459930419921875,11.234939575195312,11.293815612792969,11.118258476257324,12.10870361328125,12.251635551452637,11.076282501220703,11.664109230041504,12.398612976074219,11.496159553527832,11.82258415222168,11.470304489135742,11.340746879577637,12.212625503540039,10.980117797851562,11.57983684539795,11.1240873336792,11.654163360595703,11.260462760925293,11.391934394836426,10.89322566986084,11.501547813415527,11.8900785446167,10.93996524810791,10.885272979736328,11.528347969055176,11.503525733947754,11.81087589263916,11.610458374023438,11.825965881347656,11.274349212646484,10.811087608337402,11.338240623474121,11.545049667358398,11.304219245910645,11.655720710754395,11.276208877563477,10.926770210266113,10.414559364318848 | |
meta-llama/Llama-2-7b-hf vs huggyllama/llama-7b,0.99,12.2577486038208,11.713994979858398,11.829354286193848,11.17656421661377,10.935490608215332,10.614337921142578,11.733335494995117,10.55212116241455,11.617198944091797,11.02830982208252,11.103273391723633,10.98201847076416,11.652020454406738,11.325891494750977,11.472954750061035,11.740574836730957,11.530477523803711,11.882132530212402,11.853250503540039,11.228290557861328,11.346905708312988,11.387336730957031,11.696052551269531,11.597851753234863,10.276288032531738,10.484336853027344,11.387497901916504,11.903156280517578,10.768474578857422,11.013677597045898,11.241617202758789,11.207913398742676,11.054834365844727,11.4959716796875,10.328938484191895,11.011316299438477,11.860649108886719,10.733698844909668,11.433752059936523,11.57979965209961,10.878405570983887,10.85395336151123,11.535321235656738,11.479925155639648,11.269013404846191,10.643021583557129,10.89708423614502,10.569060325622559,10.77185344696045,11.806313514709473,11.702690124511719,11.08808708190918,10.627337455749512,11.021454811096191,11.430144309997559,10.929702758789062,11.454261779785156,11.20509147644043,10.65364933013916,11.217240333557129,11.659832000732422,10.883869171142578,11.713050842285156,10.736254692077637,11.047320365905762,11.248353004455566,11.70473861694336,10.898530006408691,10.070318222045898,10.314364433288574,10.933834075927734,10.347864151000977,11.01766586303711,10.836737632751465,10.664950370788574,10.768060684204102,10.58050537109375,10.815657615661621,10.808479309082031,11.686751365661621,10.925615310668945,11.23190975189209,11.179738998413086,11.049586296081543,10.853154182434082,11.299239158630371,12.247625350952148,10.916298866271973,11.287036895751953,11.025016784667969,11.59552001953125,11.489020347595215,10.638093948364258,11.174018859863281,10.918281555175781,11.737152099609375,10.530423164367676,10.45882797241211,10.129399299621582,11.413297653198242,11.797016143798828 | |
meta-llama/Llama-2-7b-hf vs lmsys/vicuna-7b-v1.5,0.01,1.927486538887024,11.821135520935059,11.198407173156738,9.731367111206055,10.24201774597168,10.990260124206543,11.304125785827637,11.099743843078613,12.419852256774902,12.37198543548584,11.15950870513916,11.141377449035645,11.544782638549805,12.145750999450684,12.45425033569336,11.949847221374512,11.726274490356445,10.694775581359863,11.242759704589844,10.818204879760742,12.982400894165039,10.319830894470215,12.521677017211914,12.44443130493164,13.007572174072266,10.896312713623047,12.76135540008545,12.305526733398438,10.95059871673584,11.4083890914917,12.70875358581543,12.232357025146484,12.533140182495117,10.787182807922363,10.945103645324707,11.467927932739258,10.401111602783203,10.44174861907959,12.008749008178711,11.72877311706543,11.764703750610352,9.260933876037598,9.660629272460938,10.801837921142578,10.224106788635254,10.381224632263184,9.673469543457031,11.380040168762207,11.231133460998535,19.4843807220459,11.832645416259766,11.092031478881836,10.962821960449219,9.960295677185059,10.633321762084961,10.486164093017578,10.409814834594727,11.982443809509277,9.964591026306152,12.886614799499512,10.435898780822754,12.111292839050293,10.96882152557373,11.224199295043945,11.8229341506958,12.327183723449707,10.941879272460938,10.337071418762207,10.541845321655273,11.256275177001953,12.46101188659668,11.085774421691895,11.33261489868164,11.308060646057129,11.747783660888672,10.688689231872559,11.49790096282959,10.940205574035645,8.749332427978516,12.804007530212402,11.88497543334961,11.572806358337402,9.896434783935547,11.37403392791748,10.527442932128906,10.58166790008545,10.856118202209473,11.154521942138672,10.329510688781738,11.657892227172852,11.611366271972656,11.373307228088379,11.529753684997559,11.080778121948242,9.911948204040527,11.649473190307617,11.339656829833984,11.394058227539062,10.780144691467285,10.756746292114258,12.14152717590332 | |
meta-llama/Llama-2-7b-hf vs EleutherAI/llemma_7b,0.01,2.2617459297180176,12.073643684387207,9.601665496826172,11.438493728637695,11.859432220458984,9.136279106140137,10.389586448669434,10.537793159484863,9.534730911254883,11.314827919006348,11.605558395385742,10.475652694702148,11.510196685791016,11.53796100616455,12.012444496154785,9.623738288879395,9.299081802368164,12.455397605895996,9.597661972045898,12.382262229919434,11.051375389099121,9.97256088256836,9.617574691772461,9.902551651000977,10.14933967590332,10.723257064819336,10.306427001953125,10.507966041564941,10.786173820495605,12.057255744934082,8.83625316619873,10.997522354125977,9.18514633178711,9.580232620239258,9.388571739196777,9.756895065307617,9.360857009887695,10.979372024536133,10.720354080200195,10.257851600646973,9.46591854095459,9.886110305786133,10.586426734924316,10.423218727111816,11.755844116210938,10.80079460144043,10.800479888916016,9.471284866333008,9.391319274902344,12.033135414123535,10.548360824584961,11.953831672668457,10.858678817749023,11.68787670135498,10.111590385437012,11.390238761901855,10.074946403503418,8.820842742919922,10.89544677734375,9.980045318603516,10.665379524230957,9.60218620300293,10.32979965209961,12.539587020874023,8.944647789001465,10.680426597595215,10.794387817382812,12.351435661315918,10.521456718444824,10.917708396911621,10.16922378540039,9.72276496887207,9.371663093566895,10.629088401794434,10.897933959960938,12.550519943237305,8.850178718566895,10.177017211914062,9.906770706176758,12.711698532104492,11.127090454101562,10.410646438598633,10.431379318237305,9.942227363586426,11.459847450256348,15.255708694458008,12.270158767700195,10.828147888183594,10.433170318603516,10.952369689941406,11.425644874572754,9.552434921264648,9.059112548828125,9.621358871459961,12.671890258789062,10.086618423461914,9.930680274963379,12.442190170288086,9.548700332641602,11.214447975158691,10.612310409545898 | |
meta-llama/Llama-2-7b-hf vs lmsys/vicuna-7b-v1.1,0.99,12.176732063293457,12.091742515563965,9.97877025604248,11.391814231872559,10.661109924316406,10.817087173461914,11.534388542175293,11.4345064163208,11.032309532165527,11.198960304260254,10.708231925964355,10.624578475952148,10.788887977600098,10.516521453857422,11.011672019958496,11.516682624816895,11.103180885314941,11.559279441833496,11.907734870910645,11.123976707458496,9.81065559387207,10.62118911743164,11.27763843536377,11.523125648498535,10.444096565246582,11.76767349243164,11.019610404968262,11.484188079833984,11.91457748413086,10.497208595275879,11.79175853729248,11.298368453979492,10.316926956176758,10.76264762878418,11.064068794250488,10.277680397033691,10.894306182861328,11.136940002441406,10.948762893676758,10.753281593322754,11.747068405151367,11.244501113891602,11.157686233520508,11.421767234802246,11.376803398132324,10.557845115661621,11.091096878051758,11.671372413635254,11.795584678649902,10.934099197387695,11.12752628326416,11.514236450195312,10.979425430297852,10.027077674865723,11.399852752685547,11.993349075317383,11.904010772705078,10.320328712463379,11.54886531829834,11.317293167114258,10.79334831237793,11.174019813537598,11.312468528747559,12.456009864807129,11.849678039550781,11.423567771911621,11.26967716217041,11.130739212036133,11.871220588684082,11.11894416809082,10.497808456420898,11.341180801391602,11.120772361755371,11.832477569580078,11.945394515991211,10.435187339782715,10.123815536499023,11.923513412475586,11.165726661682129,10.511265754699707,10.816417694091797,11.620299339294434,11.749600410461426,11.621185302734375,10.726395606994629,10.873162269592285,11.366233825683594,11.751174926757812,11.712872505187988,11.940488815307617,11.489510536193848,11.349868774414062,11.479663848876953,11.586658477783203,10.732288360595703,10.799344062805176,10.621288299560547,11.430079460144043,11.587478637695312,11.59907054901123,10.702455520629883 | |
meta-llama/Llama-2-7b-hf vs microsoft/Orca-2-7b,0.01,1.9139776229858398,11.634427070617676,11.600348472595215,11.60654067993164,12.126233100891113,12.646196365356445,12.883075714111328,12.256454467773438,12.270318984985352,12.459698677062988,10.608137130737305,12.05358600616455,11.529598236083984,10.475113868713379,11.170097351074219,11.863300323486328,10.970882415771484,10.59147834777832,12.343642234802246,12.012216567993164,11.054078102111816,10.566030502319336,11.191433906555176,11.72900390625,12.421318054199219,11.642358779907227,12.26218032836914,12.066629409790039,11.98330020904541,12.333147048950195,10.645333290100098,11.23974323272705,12.008330345153809,11.807231903076172,10.58747673034668,11.714077949523926,11.820113182067871,11.989992141723633,12.196735382080078,12.603166580200195,10.58354663848877,12.315600395202637,12.89755916595459,12.080608367919922,11.813095092773438,11.074599266052246,10.643448829650879,10.9359769821167,11.14858627319336,12.007444381713867,12.142953872680664,12.374232292175293,10.88144302368164,10.891057014465332,11.451530456542969,12.084410667419434,12.564109802246094,12.367560386657715,10.01160717010498,11.172256469726562,12.028122901916504,11.117118835449219,10.586153030395508,12.69567584991455,11.325706481933594,11.118499755859375,12.011213302612305,12.913552284240723,11.33607006072998,12.125946044921875,10.641644477844238,11.218090057373047,11.023408889770508,11.858912467956543,9.926396369934082,11.108742713928223,10.33670425415039,11.369423866271973,10.87497329711914,10.876391410827637,11.830551147460938,18.367767333984375,11.91503620147705,10.409882545471191,12.978384971618652,11.026498794555664,12.638835906982422,11.090004920959473,13.22242259979248,10.301204681396484,12.243881225585938,11.466031074523926,11.41460132598877,11.086315155029297,11.420076370239258,10.147997856140137,11.232333183288574,11.026019096374512,11.566559791564941,13.21921157836914,11.737604141235352 | |
meta-llama/Llama-2-7b-hf vs LLM360/Amber,0.11,9.658585548400879,9.505361557006836,11.505193710327148,9.863297462463379,10.549816131591797,10.601881980895996,9.804191589355469,9.992196083068848,10.386860847473145,10.168055534362793,10.143446922302246,11.450798988342285,10.739060401916504,10.874429702758789,11.022444725036621,11.62452507019043,11.448281288146973,10.981934547424316,10.17294692993164,9.73409652709961,10.862936973571777,10.203863143920898,9.555386543273926,11.31235122680664,9.649775505065918,9.874133110046387,10.310741424560547,10.610264778137207,11.861855506896973,11.109381675720215,10.608086585998535,10.293082237243652,10.586475372314453,9.161870956420898,10.675275802612305,9.941357612609863,10.5984525680542,9.234950065612793,9.805939674377441,10.848678588867188,10.375737190246582,11.113080024719238,10.478139877319336,10.219255447387695,9.84744930267334,9.266841888427734,11.306220054626465,11.001436233520508,9.924046516418457,10.07392406463623,10.432853698730469,11.701499938964844,9.725397109985352,9.526941299438477,11.007448196411133,10.04012680053711,11.11932373046875,11.325552940368652,10.447799682617188,10.715643882751465,9.796828269958496,10.892861366271973,10.44837474822998,10.71353816986084,10.633293151855469,10.90569019317627,10.542643547058105,11.243886947631836,11.541812896728516,11.15402889251709,10.407221794128418,11.446313858032227,10.677804946899414,10.178149223327637,10.385024070739746,11.192070960998535,9.855805397033691,10.154035568237305,11.104714393615723,10.019867897033691,10.577777862548828,9.540035247802734,11.262989044189453,9.886129379272461,9.649831771850586,10.185017585754395,9.09082317352295,10.29433822631836,9.811910629272461,10.254436492919922,11.52804946899414,10.116106986999512,10.657572746276855,9.819936752319336,10.43431282043457,10.864540100097656,10.484803199768066,12.004101753234863,11.187883377075195,10.061746597290039,11.245307922363281 | |
codellama/CodeLlama-7b-hf vs openlm-research/open_llama_7b,0.65,11.237561225891113,11.680026054382324,11.398880004882812,11.86768913269043,11.39999771118164,11.541326522827148,10.19598388671875,11.428864479064941,11.862239837646484,10.481870651245117,10.985757827758789,10.969829559326172,11.367602348327637,11.111282348632812,11.022823333740234,10.561090469360352,10.207883834838867,10.694847106933594,10.99021053314209,10.593587875366211,11.172009468078613,11.665566444396973,11.263614654541016,11.552456855773926,11.507055282592773,11.336711883544922,11.074647903442383,10.79491138458252,10.656916618347168,10.548526763916016,11.022597312927246,10.966035842895508,11.283071517944336,10.308378219604492,10.846853256225586,10.849564552307129,10.968177795410156,11.308629035949707,11.001319885253906,10.848873138427734,10.73318099975586,10.859678268432617,11.509345054626465,10.539803504943848,10.935742378234863,11.4722900390625,11.278949737548828,11.9168701171875,11.39941692352295,10.587061882019043,10.908806800842285,11.912999153137207,13.913973808288574,11.417015075683594,10.803816795349121,11.063304901123047,11.088665962219238,11.479676246643066,10.848278045654297,11.193321228027344,10.839192390441895,10.731671333312988,11.18687629699707,10.452286720275879,9.904109001159668,11.235068321228027,11.758543014526367,10.346553802490234,10.477143287658691,10.51911449432373,11.152212142944336,10.974753379821777,10.7435302734375,11.238974571228027,12.068460464477539,10.960865020751953,11.370020866394043,11.462693214416504,11.207311630249023,10.798894882202148,11.14282512664795,11.745903968811035,11.410775184631348,10.994516372680664,11.088298797607422,11.085737228393555,11.031476020812988,11.000276565551758,9.836112022399902,11.110745429992676,10.450307846069336,10.72428035736084,11.212058067321777,11.340717315673828,11.261013984680176,11.80449104309082,11.24316120147705,10.83240795135498,10.822101593017578,10.707273483276367,10.825920104980469 | |
codellama/CodeLlama-7b-hf vs huggyllama/llama-7b,0.32,10.771794319152832,10.501058578491211,10.56005859375,11.231363296508789,10.989171028137207,11.534804344177246,12.136800765991211,11.693639755249023,10.567752838134766,11.91177749633789,11.43637466430664,11.564026832580566,11.265721321105957,11.19032096862793,10.40317153930664,10.623876571655273,10.287189483642578,10.42343521118164,10.866487503051758,12.013888359069824,10.774230003356934,11.138988494873047,11.338595390319824,11.346685409545898,10.583052635192871,10.429323196411133,10.492464065551758,11.78402328491211,10.30309009552002,11.499597549438477,12.184760093688965,11.097319602966309,11.184183120727539,11.21827507019043,10.121459007263184,11.127937316894531,10.251218795776367,11.494721412658691,10.646721839904785,10.4236478805542,11.158930778503418,9.909791946411133,10.339156150817871,11.350455284118652,10.859237670898438,10.905741691589355,12.035804748535156,11.207295417785645,10.787891387939453,11.013189315795898,11.2416410446167,10.195667266845703,11.020384788513184,11.056377410888672,11.036890029907227,10.251171112060547,10.708823204040527,11.038761138916016,10.733219146728516,10.966094017028809,10.910711288452148,10.949577331542969,11.406147003173828,11.047470092773438,9.963555335998535,10.648523330688477,11.16801643371582,11.87838363647461,11.598986625671387,10.867388725280762,12.05274486541748,11.79859733581543,11.681252479553223,10.888261795043945,11.057225227355957,10.08951187133789,11.208805084228516,10.740156173706055,11.282068252563477,11.669149398803711,11.980530738830566,10.39877700805664,11.507721900939941,11.044110298156738,11.520370483398438,10.161965370178223,11.56179428100586,10.723697662353516,10.831975936889648,11.531084060668945,10.410531997680664,11.085836410522461,12.549727439880371,11.945355415344238,10.689722061157227,10.73282241821289,11.869728088378906,12.093964576721191,12.069504737854004,11.068297386169434,11.401714324951172 | |
codellama/CodeLlama-7b-hf vs lmsys/vicuna-7b-v1.5,0.01,2.302018404006958,11.172709465026855,12.183158874511719,10.882681846618652,9.767948150634766,24.42896842956543,10.943366050720215,11.75478744506836,15.235170364379883,9.498678207397461,10.326377868652344,10.187347412109375,10.607508659362793,10.732197761535645,9.70353889465332,10.948262214660645,9.788002967834473,10.9060640335083,10.285189628601074,10.656235694885254,11.046913146972656,10.868155479431152,10.477668762207031,9.764632225036621,9.605854034423828,9.753314018249512,10.699603080749512,9.823118209838867,10.523367881774902,9.758964538574219,9.701371192932129,10.879630088806152,11.618856430053711,13.337488174438477,9.767634391784668,11.983025550842285,10.607478141784668,10.280527114868164,10.127568244934082,12.975506782531738,11.579995155334473,11.203207015991211,10.376364707946777,11.423201560974121,11.209802627563477,9.359248161315918,12.826610565185547,10.373741149902344,9.53094482421875,10.389079093933105,9.205557823181152,10.689602851867676,10.414274215698242,11.192754745483398,10.581353187561035,10.557870864868164,12.383353233337402,10.521531105041504,9.70833969116211,11.450300216674805,10.21054458618164,11.530344009399414,10.224842071533203,10.37917709350586,11.495973587036133,10.469186782836914,9.77238941192627,9.884854316711426,10.01948356628418,9.838289260864258,11.130271911621094,10.648575782775879,10.440532684326172,10.789712905883789,11.31021785736084,10.711143493652344,10.09947681427002,11.175568580627441,9.295597076416016,9.529111862182617,11.034727096557617,9.88634204864502,10.545726776123047,10.429464340209961,10.972481727600098,9.342894554138184,11.735199928283691,9.926610946655273,10.638096809387207,11.533238410949707,11.10690975189209,11.147697448730469,10.123966217041016,9.807247161865234,9.888313293457031,10.178050994873047,10.791872024536133,10.62413501739502,9.422971725463867,9.87336540222168,10.158583641052246 | |
codellama/CodeLlama-7b-hf vs EleutherAI/llemma_7b,0.01,1.8881051540374756,11.02577018737793,11.569954872131348,9.650038719177246,12.788570404052734,11.758131980895996,9.877432823181152,10.481515884399414,11.251947402954102,11.191924095153809,11.158823013305664,9.50117015838623,11.658199310302734,10.220966339111328,11.156086921691895,11.29964828491211,11.514382362365723,11.962372779846191,10.005611419677734,10.904683113098145,10.441763877868652,10.319205284118652,11.140127182006836,9.660285949707031,10.85449504852295,10.57632827758789,10.08746337890625,11.422477722167969,11.425626754760742,10.719733238220215,11.198336601257324,9.705710411071777,11.163413047790527,10.196398735046387,12.473783493041992,11.222222328186035,10.624934196472168,11.31132984161377,12.355788230895996,12.014126777648926,10.066976547241211,9.984125137329102,11.545228958129883,10.415767669677734,11.360154151916504,10.375480651855469,11.507826805114746,11.202043533325195,10.294898986816406,9.63949203491211,10.868791580200195,11.535318374633789,10.94536018371582,10.92975902557373,11.955968856811523,10.462796211242676,10.789320945739746,9.42170524597168,11.375829696655273,9.957808494567871,10.590437889099121,11.810711860656738,10.596860885620117,10.35020637512207,10.983830451965332,11.902685165405273,12.088788986206055,10.547697067260742,11.154755592346191,10.342395782470703,9.408880233764648,10.353204727172852,10.178812980651855,11.717917442321777,10.343524932861328,9.107279777526855,11.441140174865723,12.437472343444824,9.229593276977539,9.995097160339355,13.198864936828613,10.668238639831543,9.889089584350586,10.291954040527344,9.115019798278809,11.644171714782715,9.289207458496094,10.126248359680176,11.402484893798828,10.980873107910156,9.706541061401367,10.721711158752441,10.652327537536621,9.227180480957031,11.187313079833984,11.266683578491211,10.421162605285645,10.27970027923584,11.191493034362793,11.59189224243164,10.865525245666504 | |
codellama/CodeLlama-7b-hf vs lmsys/vicuna-7b-v1.1,0.29,10.684311866760254,9.88802433013916,11.366555213928223,10.514373779296875,11.024654388427734,10.283346176147461,11.294032096862793,11.239091873168945,11.904309272766113,11.442981719970703,9.724740028381348,10.486740112304688,10.531262397766113,10.704021453857422,11.958599090576172,11.913610458374023,10.451040267944336,11.539121627807617,11.079904556274414,10.380290985107422,10.464911460876465,10.824151039123535,10.613722801208496,10.818387031555176,10.909002304077148,11.383553504943848,11.057758331298828,10.797572135925293,11.538613319396973,10.077716827392578,11.08383846282959,10.961997985839844,10.51244068145752,10.801470756530762,11.105854034423828,10.700427055358887,10.818270683288574,10.551637649536133,10.831416130065918,11.392118453979492,11.99124813079834,11.624017715454102,11.47986125946045,11.073309898376465,10.576276779174805,11.65388298034668,10.442636489868164,10.892948150634766,10.495064735412598,11.371464729309082,12.704389572143555,10.441764831542969,11.860435485839844,10.724827766418457,10.910114288330078,9.940642356872559,11.6974515914917,10.893054008483887,10.342144012451172,10.850858688354492,11.492060661315918,10.491785049438477,12.155282974243164,11.26374340057373,10.091155052185059,10.971919059753418,10.889839172363281,9.859522819519043,11.616873741149902,11.444681167602539,10.922995567321777,11.157459259033203,10.100933074951172,10.804597854614258,10.976712226867676,11.442182540893555,11.178875923156738,11.096498489379883,11.058975219726562,11.280542373657227,11.896692276000977,11.171963691711426,10.79887580871582,11.429341316223145,11.694151878356934,12.15854549407959,11.003829002380371,11.078950881958008,11.429558753967285,11.451701164245605,11.222574234008789,11.16943073272705,10.59616756439209,9.994879722595215,11.877211570739746,12.13260555267334,10.476194381713867,9.788568496704102,9.951860427856445,12.063776016235352,10.534578323364258 | |
codellama/CodeLlama-7b-hf vs microsoft/Orca-2-7b,0.01,2.320063829421997,9.977145195007324,11.017745018005371,11.732516288757324,10.55106258392334,10.886062622070312,11.940051078796387,10.490015029907227,11.409379959106445,9.593038558959961,9.326103210449219,11.52270221710205,10.925615310668945,11.000499725341797,11.391313552856445,10.632598876953125,10.968502044677734,9.419816970825195,11.293421745300293,10.746554374694824,10.45264720916748,9.733240127563477,9.230606079101562,11.2017240524292,11.673495292663574,9.883476257324219,14.689058303833008,10.65975284576416,11.826485633850098,9.935537338256836,10.098958969116211,11.369180679321289,10.066923141479492,10.007286071777344,10.073885917663574,10.076384544372559,10.496053695678711,9.437036514282227,10.592652320861816,9.364110946655273,10.822487831115723,10.531739234924316,10.315073013305664,9.872851371765137,10.577094078063965,10.572772026062012,10.358898162841797,10.806140899658203,16.713293075561523,10.911810874938965,9.414806365966797,11.51042366027832,10.734992027282715,12.246551513671875,11.260639190673828,10.792762756347656,10.72775936126709,10.830528259277344,11.213608741760254,12.049139976501465,9.823256492614746,9.768250465393066,10.623906135559082,11.0609712600708,10.343968391418457,8.824793815612793,10.778313636779785,10.15536880493164,10.200857162475586,11.061370849609375,10.520877838134766,8.951927185058594,10.345824241638184,11.1963472366333,10.408754348754883,9.833317756652832,9.915206909179688,11.101364135742188,11.677738189697266,11.327123641967773,10.630494117736816,9.27521800994873,10.425193786621094,10.057621955871582,10.960726737976074,11.673463821411133,10.601761817932129,10.41421127319336,10.354918479919434,11.0147705078125,11.991089820861816,9.368815422058105,8.995253562927246,11.107773780822754,10.934799194335938,10.410724639892578,10.493441581726074,10.318268775939941,9.758223533630371,9.74752426147461,10.921327590942383 | |
codellama/CodeLlama-7b-hf vs LLM360/Amber,0.01,9.603846549987793,10.922776222229004,10.763627052307129,10.674799919128418,10.944330215454102,10.779414176940918,10.923018455505371,9.871673583984375,10.166775703430176,10.810501098632812,10.085100173950195,10.842877388000488,10.339820861816406,10.418185234069824,9.797157287597656,11.204648971557617,11.264816284179688,11.797182083129883,10.71696662902832,10.645628929138184,11.203916549682617,10.131481170654297,10.013680458068848,10.839400291442871,11.125710487365723,10.923445701599121,11.895150184631348,10.997817993164062,11.030921936035156,10.626928329467773,10.994245529174805,10.47247314453125,10.476496696472168,10.737053871154785,10.940540313720703,10.545219421386719,10.65115737915039,10.538052558898926,10.73913860321045,10.608595848083496,11.778488159179688,10.629700660705566,11.484350204467773,10.728248596191406,11.018856048583984,10.530044555664062,10.73550796508789,11.228963851928711,10.074200630187988,11.778253555297852,10.75644588470459,10.916414260864258,10.685816764831543,11.022653579711914,11.845925331115723,11.053071975708008,10.963454246520996,11.932684898376465,11.024358749389648,10.258270263671875,11.10335636138916,11.186234474182129,11.418861389160156,10.6289644241333,11.077651023864746,10.978907585144043,11.048869132995605,11.65108585357666,10.4456787109375,10.570917129516602,10.814337730407715,10.753689765930176,11.621301651000977,10.818181037902832,11.07013988494873,11.709806442260742,10.042532920837402,11.51170825958252,10.287849426269531,11.025697708129883,10.106474876403809,10.335064888000488,10.63949966430664,11.414520263671875,9.983678817749023,10.194723129272461,10.093085289001465,10.348984718322754,10.452190399169922,10.118818283081055,11.143959999084473,10.194239616394043,10.520627975463867,9.854783058166504,11.553059577941895,10.919979095458984,10.652848243713379,11.446795463562012,11.503812789916992,11.831265449523926,10.060680389404297 | |
openlm-research/open_llama_7b vs huggyllama/llama-7b,0.12,10.616796493530273,10.797248840332031,10.766523361206055,10.866601943969727,11.188263893127441,10.954423904418945,11.125080108642578,11.481340408325195,10.507387161254883,10.983829498291016,11.449170112609863,11.820627212524414,10.870183944702148,11.375383377075195,11.897957801818848,10.795915603637695,11.425924301147461,10.779943466186523,10.608522415161133,11.291844367980957,10.9775972366333,11.396665573120117,11.077995300292969,10.915323257446289,10.149968147277832,10.838491439819336,11.376053810119629,12.027775764465332,11.226425170898438,11.149484634399414,11.914155006408691,11.06672191619873,12.116933822631836,11.715675354003906,11.286850929260254,10.582368850708008,10.894996643066406,10.559850692749023,11.138714790344238,11.34919548034668,11.406577110290527,11.709674835205078,10.768847465515137,11.264957427978516,11.060179710388184,10.48735523223877,11.610673904418945,10.382858276367188,11.576861381530762,11.695962905883789,11.838735580444336,11.8810396194458,11.433664321899414,10.812098503112793,11.260259628295898,10.915751457214355,10.764631271362305,11.118894577026367,11.433198928833008,11.013741493225098,11.012408256530762,11.988961219787598,10.893074989318848,11.14090633392334,11.564040184020996,11.033992767333984,11.447514533996582,10.347433090209961,11.568410873413086,11.325068473815918,11.319805145263672,11.36147689819336,10.636981010437012,10.374312400817871,11.18582820892334,11.290904998779297,11.412924766540527,11.148937225341797,11.629731178283691,10.574426651000977,11.250195503234863,11.57218074798584,10.607427597045898,11.080497741699219,11.073441505432129,11.367008209228516,11.016765594482422,11.250604629516602,10.794018745422363,11.461509704589844,12.037816047668457,10.851967811584473,11.706341743469238,10.905144691467285,12.039907455444336,10.87762451171875,11.777667999267578,11.1010160446167,11.086259841918945,10.743986129760742,11.00861930847168 | |
openlm-research/open_llama_7b vs lmsys/vicuna-7b-v1.5,0.68,11.10280704498291,11.494667053222656,10.498247146606445,11.133041381835938,11.101852416992188,11.380467414855957,10.272819519042969,10.971842765808105,10.812454223632812,10.571216583251953,11.184111595153809,10.600080490112305,10.708064079284668,11.08830738067627,11.812434196472168,11.79877758026123,10.77804946899414,10.4550142288208,10.40676498413086,10.906953811645508,10.607868194580078,11.703605651855469,11.5511474609375,9.885051727294922,12.578688621520996,10.805289268493652,10.065228462219238,10.415675163269043,9.463606834411621,10.35334587097168,10.453519821166992,11.14851188659668,11.603013038635254,11.39593505859375,10.219343185424805,10.553191184997559,11.516910552978516,11.12913990020752,10.367042541503906,11.04174518585205,10.621479988098145,10.78134536743164,10.831048011779785,10.864336967468262,10.25527572631836,10.48643970489502,10.815017700195312,10.441289901733398,10.695341110229492,12.055279731750488,11.352632522583008,11.06913948059082,11.00218677520752,10.366230010986328,10.634378433227539,11.0018949508667,11.126782417297363,9.731849670410156,10.59158706665039,11.318184852600098,10.395101547241211,11.940820693969727,10.694389343261719,11.356606483459473,10.950284957885742,11.315900802612305,10.90494155883789,11.158031463623047,11.424098014831543,10.433701515197754,11.031879425048828,10.566242218017578,10.624926567077637,11.704424858093262,9.559603691101074,10.54096508026123,10.466423034667969,10.139713287353516,10.190888404846191,10.859702110290527,11.18995475769043,11.700125694274902,10.920588493347168,10.214404106140137,11.712258338928223,11.302619934082031,11.045059204101562,21.055919647216797,10.661722183227539,10.79277515411377,11.078306198120117,10.720754623413086,11.443395614624023,10.91203498840332,11.552059173583984,9.76099967956543,10.945575714111328,10.874262809753418,11.279154777526855,9.812013626098633,11.022870063781738 | |
openlm-research/open_llama_7b vs EleutherAI/llemma_7b,0.74,11.676680564880371,10.700831413269043,10.920976638793945,11.028438568115234,12.167314529418945,11.756142616271973,11.685977935791016,10.584118843078613,11.082442283630371,11.074419975280762,11.608972549438477,12.379051208496094,10.71125316619873,10.557001113891602,11.452447891235352,11.275219917297363,11.134586334228516,11.230876922607422,12.451685905456543,10.93018913269043,11.579869270324707,11.084054946899414,12.037456512451172,11.611255645751953,11.685752868652344,11.016987800598145,11.213319778442383,11.783517837524414,11.0402250289917,10.860733032226562,11.379624366760254,11.247736930847168,10.858418464660645,11.460200309753418,11.808459281921387,11.439626693725586,12.029986381530762,10.962839126586914,11.192753791809082,11.062772750854492,11.25781536102295,11.559438705444336,11.687849044799805,10.75302791595459,12.29854965209961,11.086894989013672,12.160594940185547,11.388923645019531,10.720033645629883,11.752241134643555,11.075825691223145,11.455404281616211,11.496362686157227,10.872966766357422,11.221406936645508,12.037018775939941,11.722471237182617,11.927973747253418,10.33534049987793,10.593981742858887,10.86403751373291,11.836677551269531,11.976253509521484,11.691933631896973,10.841435432434082,10.745210647583008,11.37106990814209,12.00289535522461,11.43269157409668,12.220749855041504,11.051970481872559,11.547199249267578,10.92629337310791,11.815045356750488,11.536211967468262,11.504477500915527,11.529980659484863,11.00515365600586,11.090625762939453,11.51578426361084,11.141458511352539,11.392504692077637,11.342756271362305,11.645566940307617,11.101119995117188,11.657102584838867,11.088163375854492,11.384390830993652,11.717936515808105,11.144939422607422,11.418688774108887,11.375986099243164,10.539258003234863,11.37796401977539,12.069337844848633,11.964790344238281,11.09656047821045,10.646003723144531,11.4827880859375,11.113225936889648,11.435702323913574 | |
openlm-research/open_llama_7b vs lmsys/vicuna-7b-v1.1,0.12,10.621867179870605,10.42145824432373,11.195003509521484,11.484685897827148,10.794024467468262,11.019747734069824,11.490104675292969,11.03162670135498,10.776253700256348,11.319022178649902,11.029229164123535,10.46848201751709,11.557026863098145,11.630350112915039,10.21575927734375,10.254093170166016,11.132651329040527,10.749253273010254,11.896352767944336,11.851204872131348,11.208588600158691,11.714153289794922,10.779157638549805,11.057229995727539,11.114214897155762,10.616106033325195,11.634489059448242,11.256133079528809,10.997373580932617,12.223559379577637,11.170015335083008,11.057348251342773,11.103898048400879,11.418852806091309,11.480304718017578,11.226197242736816,11.045890808105469,11.04325008392334,10.816136360168457,10.4801025390625,11.184416770935059,11.797572135925293,11.668251037597656,10.831574440002441,11.632143020629883,11.781684875488281,10.77318000793457,10.571830749511719,11.864724159240723,11.191770553588867,11.520462989807129,10.72339153289795,11.068861961364746,11.573631286621094,10.899574279785156,11.674015998840332,10.131872177124023,11.504653930664062,11.00288200378418,10.962963104248047,11.146491050720215,11.226775169372559,10.69074821472168,11.214972496032715,10.712320327758789,10.820131301879883,11.37641429901123,11.639378547668457,11.570959091186523,11.473939895629883,10.993010520935059,11.848175048828125,11.129470825195312,11.361347198486328,11.544088363647461,11.482494354248047,11.397720336914062,11.10208797454834,10.954719543457031,11.292170524597168,10.730844497680664,11.10391616821289,10.9358491897583,11.644660949707031,11.556461334228516,10.230692863464355,10.93234634399414,11.048127174377441,11.356008529663086,11.290736198425293,10.155138969421387,11.609221458435059,11.059803009033203,10.940677642822266,10.850116729736328,11.106929779052734,10.82397747039795,11.759023666381836,10.195236206054688,11.401366233825684,11.2745943069458 | |
openlm-research/open_llama_7b vs microsoft/Orca-2-7b,0.85,11.351534843444824,10.584617614746094,10.774704933166504,10.837328910827637,11.801148414611816,11.330574035644531,11.290903091430664,10.862215042114258,11.384349822998047,10.745942115783691,11.390098571777344,11.086434364318848,11.015809059143066,9.748941421508789,10.519133567810059,11.45274829864502,10.552149772644043,11.926155090332031,10.675073623657227,11.253833770751953,10.66576862335205,9.94119644165039,19.171932220458984,10.226914405822754,12.355240821838379,11.018232345581055,10.890312194824219,10.387739181518555,11.143223762512207,12.016646385192871,10.465130805969238,10.726633071899414,11.077333450317383,10.403158187866211,11.529679298400879,10.922691345214844,10.887194633483887,11.203007698059082,10.525662422180176,10.012253761291504,9.828682899475098,10.859309196472168,10.792906761169434,10.633329391479492,10.120866775512695,10.508658409118652,12.094136238098145,10.804570198059082,10.728404998779297,10.672419548034668,12.785676002502441,9.50652027130127,11.129883766174316,11.35869312286377,11.06894302368164,9.344609260559082,10.127263069152832,10.435647010803223,10.6591215133667,11.319711685180664,10.950030326843262,10.885222434997559,10.239350318908691,10.612761497497559,11.15457534790039,10.812723159790039,10.070918083190918,10.321710586547852,11.3038969039917,11.07686996459961,10.517918586730957,9.845905303955078,10.563337326049805,10.056536674499512,11.044584274291992,11.334074974060059,10.442049026489258,10.59984302520752,10.307365417480469,11.638136863708496,10.443572044372559,10.811623573303223,10.837541580200195,10.584928512573242,10.582640647888184,10.057901382446289,10.074625968933105,11.10626220703125,11.195136070251465,10.514017105102539,10.031493186950684,9.643144607543945,11.793606758117676,10.653993606567383,10.19979190826416,11.364167213439941,10.87149715423584,11.284507751464844,9.864716529846191,10.578239440917969,10.145064353942871 | |