{"id":9249,"date":"2026-02-21T11:42:51","date_gmt":"2026-02-21T06:12:51","guid":{"rendered":"https:\/\/www.testleaf.com\/blog\/?post_type=web-story&#038;p=9249"},"modified":"2026-02-21T11:42:53","modified_gmt":"2026-02-21T06:12:53","slug":"how-qa-engineers-should-evaluate-generative-ai-models","status":"publish","type":"web-story","link":"https:\/\/www.testleaf.com\/blog\/web-stories\/how-qa-engineers-should-evaluate-generative-ai-models\/","title":{"rendered":"How QA engineers should evaluate generative AI models"},"content":{"rendered":"<p><html amp=\"\" lang=\"en\"><head><meta charSet=\"utf-8\"\/><meta name=\"viewport\" content=\"width=device-width,minimum-scale=1,initial-scale=1\"\/><script async=\"\" src=\"https:\/\/cdn.ampproject.org\/v0.js\"><\/script><script async=\"\" src=\"https:\/\/cdn.ampproject.org\/v0\/amp-story-1.0.js\" custom-element=\"amp-story\"><\/script><link href=\"https:\/\/fonts.googleapis.com\/css2?display=swap&amp;family=Roboto%3Awght%40700\" rel=\"stylesheet\"\/><link href=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models.jpg\" rel=\"preload\" as=\"image\"\/>\n<style amp-boilerplate=\"\">body{-webkit-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-moz-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-ms-animation:-amp-start 8s steps(1,end) 0s 1 normal both;animation:-amp-start 8s steps(1,end) 0s 1 normal both}@-webkit-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-moz-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-ms-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-o-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}<\/style>\n<p><noscript><\/p>\n<style amp-boilerplate=\"\">body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}<\/style>\n<p><\/noscript><\/p>\n<style amp-custom=\"\">\n              h1, h2, h3 { font-weight: normal; }<\/p>\n<p>              amp-story-page {\n                background-color: #131516;\n              }<\/p>\n<p>              amp-story-grid-layer {\n                overflow: visible;\n              }<\/p>\n<p>              @media (max-aspect-ratio: 9 \/ 16)  {\n                @media (min-aspect-ratio: 320 \/ 678) {\n                  amp-story-grid-layer.grid-layer {\n                    margin-top: calc((100% \/ 0.5625 - 100% \/ 0.6666666666666666) \/ 2);\n                  }\n                }\n              }<\/p>\n<p>              @media not all and (min-resolution:.001dpcm) {\n                @media {\n                  p.text-wrapper > span {\n                    font-size: calc(100% - 0.5px);\n                  }\n                }\n              }<\/p>\n<p>              .page-fullbleed-area,\n              .page-background-overlay-area {\n                position: absolute;\n                overflow: hidden;\n                width: 100%;\n                left: 0;\n                height: calc(1.1851851851851851 * 100%);\n                top: calc((1 - 1.1851851851851851) * 100% \/ 2);\n              }<\/p>\n<p>              .element-overlay-area {\n                position: absolute;\n                width: 100%;\n                height: 100%;\n                top: 0;\n                left: 0;\n              }<\/p>\n<p>              .page-safe-area {\n                overflow: visible;\n                position: absolute;\n                top: 0;\n                bottom: 0;\n                left: 0;\n                right: 0;\n                width: 100%;\n                height: calc(0.84375 * 100%);\n                margin: auto 0;\n              }<\/p>\n<p>              .mask {\n                position: absolute;\n                overflow: hidden;\n              }<\/p>\n<p>              .fill {\n                position: absolute;\n                top: 0;\n                left: 0;\n                right: 0;\n                bottom: 0;\n                margin: 0;\n              }<\/p>\n<p>              @media (prefers-reduced-motion: no-preference) {\n                .animation-wrapper {\n                  opacity: var(--initial-opacity);\n                  transform: var(--initial-transform);\n                }\n              }<\/p>\n<p>              amp-story-grid-layer.align-bottom {\n                align-content: end;\n                padding: 0;\n                \/*\n                  AMP CTA Layer will exactly occupy 74px regardless of any device.\n                  To space out captions 74px from the BOTTOM (AMP CTA Layer),\n                  74px from the TOP should also be spaced out and thus: 2 * 74px\n                  will be the desired max-height.\n                *\/\n                max-height: calc(100vh - (2 * 74px));\n              }<\/p>\n<p>              .captions-area {\n                padding: 0 32px 0;\n              }<\/p>\n<p>              amp-story-captions {\n                margin-bottom: 16px;\n                text-align: center;\n              }<\/p>\n<p>              amp-story-audio-sticker {\n                height: 100%;\n              }<\/p>\n<p>              .audio-sticker {\n                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen-Sans, Ubuntu, Cantarell, 'Helvetica Neue', sans-serif;\n              }\n              <\/style>\n<p><meta name=\"web-stories-replace-head-start\"\/><title>How QA engineers should evaluate generative AI models<\/title><link rel=\"canonical\" href=\"https:\/\/www.testleaf.com\/blog\/?post_type=web-story&amp;p=9249\"\/><meta name=\"web-stories-replace-head-end\"\/><\/head><body><amp-story standalone=\"\" publisher=\"Testleaf\" publisher-logo-src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2022\/05\/favicon.png\" title=\"How QA engineers should evaluate generative AI models\" poster-portrait-src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/cropped-How-QA-engineers-should-evaluate-generative-AI-models.jpg\"><amp-story-page id=\"27919517-af7b-477c-accb-1b64f53e4d3d\" auto-advance-after=\"7s\"><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\" style=\"background-color:#270842\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:0;top:-9.25926%;width:100%;height:118.51852%;opacity:1\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0\" class=\"mask\" id=\"el-29e6c51f-2720-47bf-b91f-28174c274e0e\">\n<div style=\"position:absolute;width:100%;height:100%;left:0%;top:0%\" data-leaf-element=\"true\"><amp-img layout=\"fill\" src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models.jpg\" alt=\"How QA engineers should evaluate generative AI models\" srcSet=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models.jpg 720w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-576x1024.jpg 576w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-169x300.jpg 169w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-150x267.jpg 150w\" sizes=\"(min-width: 1024px) 45vh, 100vw\" disable-inline-width=\"true\"><\/amp-img><\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\">\n<div class=\"page-safe-area\"><\/div>\n<\/div>\n<p><\/amp-story-grid-layer><amp-story-page-outlink layout=\"nodisplay\" cta-image=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2022\/05\/favicon-300x300.png\"><a href=\"https:\/\/www.testleaf.com\/blog\/best-generative-ai-models-in-2026-for-qa-engineers-top-7-compared-use-cases-strengths-limitations\/?utm_source=Web_Story&amp;utm_medium=Organic&amp;utm_campaign=Web_Story\" rel=\"\">Learn more<\/a><\/amp-story-page-outlink><\/amp-story-page><amp-story-page id=\"7291eaf8-ba7f-4085-a862-afceb2dc3d9a\" auto-advance-after=\"7s\"><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-36368bd3-2053-410a-b44b-49d833b074f5 [data-leaf-element=\\\"true\\\"]\",\"keyframes\":{\"transform\":[\"translate(0%, 0%) scale(1.5)\",\"translate(0%, 0%) scale(1)\"]},\"delay\":0,\"duration\":2000,\"easing\":\"cubic-bezier(.3,0,.55,1)\",\"fill\":\"forwards\"}]<\/script><\/amp-story-animation><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-85bd9922-2735-4056-b17f-9d17be56a654\",\"keyframes\":[{\"offset\":0,\"transform\":\"translate3d(0, -1731.33894%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.29,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.45,\"transform\":\"translate3d(0, -486.852509928%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.61,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.71,\"transform\":\"translate3d(0, -165.516002664%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.8,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.85,\"transform\":\"translate3d(0, -62.155067946%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.92,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.96,\"transform\":\"translate3d(0, -27.008887464%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":1,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"}],\"delay\":0,\"duration\":600,\"fill\":\"both\"}]<\/script><\/amp-story-animation><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\" style=\"background-color:#b7d4e2\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:0;top:-9.25926%;width:100%;height:118.51852%;opacity:1\">\n<div id=\"anim-36368bd3-2053-410a-b44b-49d833b074f5\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0\" class=\"mask\" id=\"el-b3a63116-7a6e-4db2-b09c-8043eb5fa9b8\">\n<div style=\"position:absolute;width:100%;height:100%;left:0%;top:0%\" data-leaf-element=\"true\"><amp-img layout=\"fill\" src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1.jpg\" alt=\"How QA engineers should evaluate generative AI models (1)\" srcSet=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1.jpg 720w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-576x1024.jpg 576w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-169x300.jpg 169w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-150x267.jpg 150w\" sizes=\"(min-width: 1024px) 45vh, 100vw\" disable-inline-width=\"true\"><\/amp-img><\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:4.12621%;top:93.68932%;width:101.21359%;height:6.31068%;opacity:1\">\n<div id=\"anim-85bd9922-2735-4056-b17f-9d17be56a654\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0;--initial-opacity:1;--initial-transform:translate3d(0, -1731.33894%, 0)\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0;border-radius:0.4796163069544364% 0.4796163069544364% 0.4796163069544364% 0.4796163069544364% \/ 5.128205128205128% 5.128205128205128% 5.128205128205128% 5.128205128205128%\" id=\"el-cfa49128-ba1a-401e-8495-98a7e19203d7\">\n<h2 class=\"fill text-wrapper\" style=\"white-space:pre-line;overflow-wrap:break-word;word-break:break-word;margin:-0.11128597122302177% 0;font-family:&quot;Roboto&quot;,&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,sans-serif;font-size:0.533981em;line-height:1.2;text-align:left;padding:0;color:#000000\"><span><span style=\"font-weight: 700\">Code Reasoning Accuracy<\/span><\/span><\/h2>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><\/amp-story-page><amp-story-page id=\"66cbdd88-f271-47da-b029-44a3d3df96fc\" auto-advance-after=\"7s\"><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-0b27ef7a-585a-4a99-b480-61a1ec41f0e3 [data-leaf-element=\\\"true\\\"]\",\"keyframes\":{\"transform\":[\"translate(0%, 0%) scale(1.5)\",\"translate(0%, 0%) scale(1)\"]},\"delay\":0,\"duration\":2000,\"easing\":\"cubic-bezier(.3,0,.55,1)\",\"fill\":\"forwards\"}]<\/script><\/amp-story-animation><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-3e31b4ae-bb26-44c2-a7c1-1ceb3f123e8c\",\"keyframes\":[{\"offset\":0,\"transform\":\"translate3d(0, -1731.33894%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.29,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.45,\"transform\":\"translate3d(0, -486.852509928%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.61,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.71,\"transform\":\"translate3d(0, -165.516002664%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.8,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.85,\"transform\":\"translate3d(0, -62.155067946%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.92,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.96,\"transform\":\"translate3d(0, -27.008887464%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":1,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"}],\"delay\":0,\"duration\":600,\"fill\":\"both\"}]<\/script><\/amp-story-animation><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\" style=\"background-color:#dde0e5\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:0;top:-9.25926%;width:100%;height:118.51852%;opacity:1\">\n<div id=\"anim-0b27ef7a-585a-4a99-b480-61a1ec41f0e3\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0\" class=\"mask\" id=\"el-15bfd279-f1a4-4c29-8fdc-5cbb4205548b\">\n<div style=\"position:absolute;width:100%;height:100%;left:0%;top:0%\" data-leaf-element=\"true\"><amp-img layout=\"fill\" src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2.jpg\" alt=\"How QA engineers should evaluate generative AI models (2)\" srcSet=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2.jpg 720w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-576x1024.jpg 576w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-169x300.jpg 169w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-150x267.jpg 150w\" sizes=\"(min-width: 1024px) 45vh, 100vw\" disable-inline-width=\"true\"><\/amp-img><\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:13.83495%;top:93.68932%;width:101.21359%;height:6.31068%;opacity:1\">\n<div id=\"anim-3e31b4ae-bb26-44c2-a7c1-1ceb3f123e8c\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0;--initial-opacity:1;--initial-transform:translate3d(0, -1731.33894%, 0)\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0;border-radius:0.4796163069544364% 0.4796163069544364% 0.4796163069544364% 0.4796163069544364% \/ 5.128205128205128% 5.128205128205128% 5.128205128205128% 5.128205128205128%\" id=\"el-99ce88bf-d888-452a-baac-12cc9ab0094b\">\n<h2 class=\"fill text-wrapper\" style=\"white-space:pre-line;overflow-wrap:break-word;word-break:break-word;margin:-0.11128597122302177% 0;font-family:&quot;Roboto&quot;,&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,sans-serif;font-size:0.533981em;line-height:1.2;text-align:left;padding:0;color:#000000\"><span><span style=\"font-weight: 700\">Hallucination Risk<\/span><\/span><\/h2>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><\/amp-story-page><amp-story-page id=\"bca17345-6f6f-423f-8ea6-c497f50b2b5b\" auto-advance-after=\"7s\"><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-2726cdc7-1aa9-4798-a54a-5dcb90802c5e [data-leaf-element=\\\"true\\\"]\",\"keyframes\":{\"transform\":[\"translate(0%, 0%) scale(1.5)\",\"translate(0%, 0%) scale(1)\"]},\"delay\":0,\"duration\":2000,\"easing\":\"cubic-bezier(.3,0,.55,1)\",\"fill\":\"forwards\"}]<\/script><\/amp-story-animation><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-95e719f5-24d9-4180-a46b-2b39b2f915c0\",\"keyframes\":[{\"offset\":0,\"transform\":\"translate3d(0, -1731.33894%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.29,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.45,\"transform\":\"translate3d(0, -486.852509928%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.61,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.71,\"transform\":\"translate3d(0, -165.516002664%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.8,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.85,\"transform\":\"translate3d(0, -62.155067946%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.92,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.96,\"transform\":\"translate3d(0, -27.008887464%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":1,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"}],\"delay\":0,\"duration\":600,\"fill\":\"both\"}]<\/script><\/amp-story-animation><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\" style=\"background-color:#573b67\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:0;top:-9.25926%;width:100%;height:118.51852%;opacity:1\">\n<div id=\"anim-2726cdc7-1aa9-4798-a54a-5dcb90802c5e\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0\" class=\"mask\" id=\"el-b3e5587b-e632-4895-91f0-d353d1682657\">\n<div style=\"position:absolute;width:100%;height:100%;left:0%;top:0%\" data-leaf-element=\"true\"><amp-img layout=\"fill\" src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3.jpg\" alt=\"How QA engineers should evaluate generative AI models (3)\" srcSet=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3.jpg 720w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-576x1024.jpg 576w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-169x300.jpg 169w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-150x267.jpg 150w\" sizes=\"(min-width: 1024px) 45vh, 100vw\" disable-inline-width=\"true\"><\/amp-img><\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:13.83495%;top:93.68932%;width:101.21359%;height:6.31068%;opacity:1\">\n<div id=\"anim-95e719f5-24d9-4180-a46b-2b39b2f915c0\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0;--initial-opacity:1;--initial-transform:translate3d(0, -1731.33894%, 0)\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0;border-radius:0.4796163069544364% 0.4796163069544364% 0.4796163069544364% 0.4796163069544364% \/ 5.128205128205128% 5.128205128205128% 5.128205128205128% 5.128205128205128%\" id=\"el-718c96d8-0ffe-4902-8c7a-ad1933102490\">\n<h2 class=\"fill text-wrapper\" style=\"white-space:pre-line;overflow-wrap:break-word;word-break:break-word;margin:-0.11128597122302177% 0;font-family:&quot;Roboto&quot;,&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,sans-serif;font-size:0.533981em;line-height:1.2;text-align:left;padding:0;color:#000000\"><span><span style=\"font-weight: 700\">Context Window Size<\/span><\/span><\/h2>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><\/amp-story-page><amp-story-page id=\"ee96912d-4f76-4bde-ba00-b76aab2edbe7\" auto-advance-after=\"7s\"><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-869e7fe0-1baf-42e3-be2e-c1ae89297fd3 [data-leaf-element=\\\"true\\\"]\",\"keyframes\":{\"transform\":[\"translate(0%, 0%) scale(1.5)\",\"translate(0%, 0%) scale(1)\"]},\"delay\":0,\"duration\":2000,\"easing\":\"cubic-bezier(.3,0,.55,1)\",\"fill\":\"forwards\"}]<\/script><\/amp-story-animation><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-61f10779-0a04-45fc-8e0f-f4aa06b571aa\",\"keyframes\":[{\"offset\":0,\"transform\":\"translate3d(0, -1731.33894%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.29,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.45,\"transform\":\"translate3d(0, -486.852509928%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.61,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.71,\"transform\":\"translate3d(0, -165.516002664%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.8,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.85,\"transform\":\"translate3d(0, -62.155067946%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.92,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.96,\"transform\":\"translate3d(0, -27.008887464%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":1,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"}],\"delay\":0,\"duration\":600,\"fill\":\"both\"}]<\/script><\/amp-story-animation><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\" style=\"background-color:#e1d7e8\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:0;top:-9.25926%;width:100%;height:118.51852%;opacity:1\">\n<div id=\"anim-869e7fe0-1baf-42e3-be2e-c1ae89297fd3\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0\" class=\"mask\" id=\"el-f735ba28-d248-47e0-b20e-bbb61c177361\">\n<div style=\"position:absolute;width:100%;height:100%;left:0%;top:0%\" data-leaf-element=\"true\"><amp-img layout=\"fill\" src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4.jpg\" alt=\"How QA engineers should evaluate generative AI models (4)\" srcSet=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4.jpg 720w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-576x1024.jpg 576w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-169x300.jpg 169w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-150x267.jpg 150w\" sizes=\"(min-width: 1024px) 45vh, 100vw\" disable-inline-width=\"true\"><\/amp-img><\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:11.40777%;top:93.68932%;width:101.21359%;height:6.31068%;opacity:1\">\n<div id=\"anim-61f10779-0a04-45fc-8e0f-f4aa06b571aa\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0;--initial-opacity:1;--initial-transform:translate3d(0, -1731.33894%, 0)\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0;border-radius:0.4796163069544364% 0.4796163069544364% 0.4796163069544364% 0.4796163069544364% \/ 5.128205128205128% 5.128205128205128% 5.128205128205128% 5.128205128205128%\" id=\"el-ff1ca48e-91bf-4d21-81b8-e0e8d5f2da3c\">\n<h2 class=\"fill text-wrapper\" style=\"white-space:pre-line;overflow-wrap:break-word;word-break:break-word;margin:-0.11128597122302177% 0;font-family:&quot;Roboto&quot;,&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,sans-serif;font-size:0.533981em;line-height:1.2;text-align:left;padding:0;color:#000000\"><span><span style=\"font-weight: 700\">Multimodal Capability<\/span><\/span><\/h2>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><\/amp-story-page><amp-story-page id=\"4589970d-7073-4c30-b698-40d00851071d\" auto-advance-after=\"7s\"><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-502835fd-d574-4569-b9cb-2468cd3678e8 [data-leaf-element=\\\"true\\\"]\",\"keyframes\":{\"transform\":[\"translate(0%, 0%) scale(1.5)\",\"translate(0%, 0%) scale(1)\"]},\"delay\":0,\"duration\":2000,\"easing\":\"cubic-bezier(.3,0,.55,1)\",\"fill\":\"forwards\"}]<\/script><\/amp-story-animation><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-562a12fb-3338-43ab-9197-e8670e7352aa\",\"keyframes\":[{\"offset\":0,\"transform\":\"translate3d(0, -1731.33894%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.29,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.45,\"transform\":\"translate3d(0, -486.852509928%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.61,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.71,\"transform\":\"translate3d(0, -165.516002664%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.8,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.85,\"transform\":\"translate3d(0, -62.155067946%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.92,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.96,\"transform\":\"translate3d(0, -27.008887464%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":1,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"}],\"delay\":0,\"duration\":600,\"fill\":\"both\"}]<\/script><\/amp-story-animation><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\" style=\"background-color:#eae8eb\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:0;top:-9.25926%;width:100%;height:118.51852%;opacity:1\">\n<div id=\"anim-502835fd-d574-4569-b9cb-2468cd3678e8\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0\" class=\"mask\" id=\"el-189b6187-caab-4d12-a6a5-5fc6c5cb639a\">\n<div style=\"position:absolute;width:100%;height:100%;left:0%;top:0%\" data-leaf-element=\"true\"><amp-img layout=\"fill\" src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5.jpg\" alt=\"How QA engineers should evaluate generative AI models (5)\" srcSet=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5.jpg 720w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-576x1024.jpg 576w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-169x300.jpg 169w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-150x267.jpg 150w\" sizes=\"(min-width: 1024px) 45vh, 100vw\" disable-inline-width=\"true\"><\/amp-img><\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:8.98058%;top:93.68932%;width:101.21359%;height:6.31068%;opacity:1\">\n<div id=\"anim-562a12fb-3338-43ab-9197-e8670e7352aa\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0;--initial-opacity:1;--initial-transform:translate3d(0, -1731.33894%, 0)\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0;border-radius:0.4796163069544364% 0.4796163069544364% 0.4796163069544364% 0.4796163069544364% \/ 5.128205128205128% 5.128205128205128% 5.128205128205128% 5.128205128205128%\" id=\"el-c3c9266f-81e9-45bc-817a-c40d43435bdd\">\n<h2 class=\"fill text-wrapper\" style=\"white-space:pre-line;overflow-wrap:break-word;word-break:break-word;margin:-0.11128597122302177% 0;font-family:&quot;Roboto&quot;,&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,sans-serif;font-size:0.533981em;line-height:1.2;text-align:left;padding:0;color:#000000\"><span><span style=\"font-weight: 700\">Enterprise Deployment<\/span><\/span><\/h2>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><\/amp-story-page><amp-story-page id=\"40ab2452-9f27-4f8c-8b3b-cd52b5596996\" auto-advance-after=\"7s\"><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-83410df6-67a1-41c7-87d1-00d0e31d461e [data-leaf-element=\\\"true\\\"]\",\"keyframes\":{\"transform\":[\"translate(0%, 0%) scale(1.5)\",\"translate(0%, 0%) scale(1)\"]},\"delay\":0,\"duration\":2000,\"easing\":\"cubic-bezier(.3,0,.55,1)\",\"fill\":\"forwards\"}]<\/script><\/amp-story-animation><amp-story-animation layout=\"nodisplay\" trigger=\"visibility\"><script type=\"application\/json\">[{\"selector\":\"#anim-a35a8e62-802c-4271-bd7c-44b42ecdd917\",\"keyframes\":[{\"offset\":0,\"transform\":\"translate3d(0, -1731.33894%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.29,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.45,\"transform\":\"translate3d(0, -486.852509928%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.61,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.71,\"transform\":\"translate3d(0, -165.516002664%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.8,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.85,\"transform\":\"translate3d(0, -62.155067946%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":0.92,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"},{\"offset\":0.96,\"transform\":\"translate3d(0, -27.008887464%, 0)\",\"easing\":\"cubic-bezier(.5, 0, 1, 1)\"},{\"offset\":1,\"transform\":\"translate3d(0, 0%, 0)\",\"easing\":\"cubic-bezier(0, 0, .5, 1)\"}],\"delay\":0,\"duration\":600,\"fill\":\"both\"}]<\/script><\/amp-story-animation><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\" style=\"background-color:#082960\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:0;top:-9.25926%;width:100%;height:118.51852%;opacity:1\">\n<div id=\"anim-83410df6-67a1-41c7-87d1-00d0e31d461e\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0\" class=\"mask\" id=\"el-ad43bf0b-fde4-4a24-8814-44c6ea598441\">\n<div style=\"position:absolute;width:100%;height:100%;left:0%;top:0%\" data-leaf-element=\"true\"><amp-img layout=\"fill\" src=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6.jpg\" alt=\"How QA engineers should evaluate generative AI models (6)\" srcSet=\"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6.jpg 720w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-576x1024.jpg 576w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-169x300.jpg 169w,https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-150x267.jpg 150w\" sizes=\"(min-width: 1024px) 45vh, 100vw\" disable-inline-width=\"true\"><\/amp-img><\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><amp-story-grid-layer template=\"vertical\" aspect-ratio=\"412:618\" class=\"grid-layer\"><\/p>\n<div class=\"page-fullbleed-area\">\n<div class=\"page-safe-area\">\n<div style=\"position:absolute;pointer-events:none;left:13.83495%;top:93.68932%;width:101.21359%;height:6.31068%;opacity:1\">\n<div id=\"anim-a35a8e62-802c-4271-bd7c-44b42ecdd917\" class=\"animation-wrapper\" style=\"width:100%;height:100%;display:block;position:absolute;top:0;left:0;--initial-opacity:1;--initial-transform:translate3d(0, -1731.33894%, 0)\">\n<div style=\"pointer-events:initial;width:100%;height:100%;display:block;position:absolute;top:0;left:0;z-index:0;border-radius:0.4796163069544364% 0.4796163069544364% 0.4796163069544364% 0.4796163069544364% \/ 5.128205128205128% 5.128205128205128% 5.128205128205128% 5.128205128205128%\" id=\"el-8f77c734-e0f9-4451-9d5f-a0de23c78173\">\n<h2 class=\"fill text-wrapper\" style=\"white-space:pre-line;overflow-wrap:break-word;word-break:break-word;margin:-0.11128597122302177% 0;font-family:&quot;Roboto&quot;,&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,sans-serif;font-size:0.533981em;line-height:1.2;text-align:left;padding:0;color:#000000\"><span><span style=\"font-weight: 700\">Agentic Capability<\/span><\/span><\/h2>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<p><\/amp-story-grid-layer><\/amp-story-page><\/amp-story><\/body><\/html><\/p>\n","protected":false},"excerpt":{"rendered":"<p>Learn how QA engineers can evaluate generative AI models using accuracy, bias detection, reliability, test coverage, and real-world performance to ensure safe, scalable AI-driven testing.<\/p>\n","protected":false},"author":1,"featured_media":9250,"template":"","meta":{"_acf_changed":false,"om_disable_all_campaigns":false,"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"site-sidebar-layout":"default","site-content-layout":"default","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","web_stories_publisher_logo":161,"web_stories_poster":[],"web_stories_products":[],"footnotes":""},"web_story_category":[],"web_story_tag":[],"class_list":["post-9249","web-story","type-web-story","status-publish","has-post-thumbnail","hentry"],"aioseo_notices":[],"amp_enabled":false,"story_data":{"version":47,"pages":[{"elements":[{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":48,"y":0,"width":330,"height":586,"mask":{"type":"rectangle"},"isBackground":true,"type":"image","id":"29e6c51f-2720-47bf-b91f-28174c274e0e","scale":100,"focalX":50,"focalY":50,"resource":{"type":"image","mimeType":"image\/jpeg","width":720,"height":1280,"sizes":{"medium":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-169x300.jpg","width":169,"height":300,"filesize":12108,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-169x300.jpg"},"large":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-576x1024.jpg","width":576,"height":1024,"filesize":68541,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-576x1024.jpg"},"thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-150x150.jpg","width":150,"height":150,"filesize":7029,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-150x150.jpg"},"rpg_gallery_admin_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-150x150.jpg","width":150,"height":150,"filesize":7029,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-150x150.jpg"},"rpg_gallery_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-150x150.jpg","width":150,"height":150,"filesize":7029,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-150x150.jpg"},"web-stories-poster-portrait":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-640x853.jpg","width":640,"height":853,"filesize":67232,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-640x853.jpg"},"web-stories-publisher-logo":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-96x96.jpg","width":96,"height":96,"filesize":3556,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-96x96.jpg"},"web-stories-thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-150x267.jpg","width":150,"height":267,"filesize":9906,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-150x267.jpg"},"full":{"file":"How-QA-engineers-should-evaluate-generative-AI-models.jpg","width":720,"height":1280,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models.jpg"}},"isPlaceholder":false,"isExternal":false,"needsProxy":false,"baseColor":"#270842","blurHash":"U8AJjX9D0J%ONW-@D$IS4nRh?IkD4.D$?c%3","creationDate":"2026-02-21T05:51:10","src":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models.jpg","id":9246,"alt":"How QA engineers should evaluate generative AI models","local":false}}],"backgroundColor":{"color":{"r":255,"g":255,"b":255}},"id":"27919517-af7b-477c-accb-1b64f53e4d3d","defaultBackgroundElement":{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":1,"y":1,"width":1,"height":1,"mask":{"type":"rectangle"},"isBackground":true,"isDefaultBackground":true,"type":"shape","id":"87b9a8bf-3ebc-4dac-a834-68387635d637"},"animations":[],"pageAttachment":{"url":"https:\/\/www.testleaf.com\/blog\/best-generative-ai-models-in-2026-for-qa-engineers-top-7-compared-use-cases-strengths-limitations\/?utm_source=Web_Story&utm_medium=Organic&utm_campaign=Web_Story","icon":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2022\/05\/favicon-300x300.png","needsProxy":false}},{"elements":[{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":48,"y":0,"width":330,"height":586,"mask":{"type":"rectangle"},"isBackground":true,"type":"image","id":"b3a63116-7a6e-4db2-b09c-8043eb5fa9b8","scale":100,"focalX":50,"focalY":50,"resource":{"type":"image","mimeType":"image\/jpeg","width":720,"height":1280,"sizes":{"medium":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1-169x300.jpg","width":169,"height":300,"filesize":9010,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-169x300.jpg"},"large":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1-576x1024.jpg","width":576,"height":1024,"filesize":41836,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-576x1024.jpg"},"thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1-150x150.jpg","width":150,"height":150,"filesize":5651,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-150x150.jpg"},"rpg_gallery_admin_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1-150x150.jpg","width":150,"height":150,"filesize":5651,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-150x150.jpg"},"rpg_gallery_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1-150x150.jpg","width":150,"height":150,"filesize":5651,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-150x150.jpg"},"web-stories-poster-portrait":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1-640x853.jpg","width":640,"height":853,"filesize":44832,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-640x853.jpg"},"web-stories-publisher-logo":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1-96x96.jpg","width":96,"height":96,"filesize":3347,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-96x96.jpg"},"web-stories-thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1-150x267.jpg","width":150,"height":267,"filesize":7662,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1-150x267.jpg"},"full":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-1.jpg","width":720,"height":1280,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1.jpg"}},"isPlaceholder":false,"isExternal":false,"needsProxy":false,"baseColor":"#b7d4e2","blurHash":"UaK_wkrdtmS#Ekt7jsad_LthW9ja9aRPbbf-","creationDate":"2026-02-21T05:51:04","src":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-1.jpg","id":9240,"alt":"How QA engineers should evaluate generative AI models (1)","local":false}},{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundTextMode":"NONE","font":{"family":"Roboto"},"fontSize":33,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"lineHeight":1.2,"textAlign":"left","padding":{"locked":true,"hasHiddenPadding":false,"horizontal":0,"vertical":0},"content":"<span style=\"font-weight: 700\">Code Reasoning Accuracy<\/span>","x":17,"y":579,"width":417,"borderRadius":{"locked":true,"topLeft":2,"topRight":2,"bottomRight":2,"bottomLeft":2},"type":"text","height":39,"id":"cfa49128-ba1a-401e-8495-98a7e19203d7"}],"backgroundColor":{"color":{"r":255,"g":255,"b":255}},"id":"7291eaf8-ba7f-4085-a862-afceb2dc3d9a","defaultBackgroundElement":{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":1,"y":1,"width":1,"height":1,"mask":{"type":"rectangle"},"isBackground":true,"isDefaultBackground":true,"type":"shape","id":"579fe7be-8186-4b89-aa2c-224545a3cebc"},"animations":[{"id":"39535dd9-7bc1-4159-8f11-5fd787a36209","type":"effect-background-zoom","zoomDirection":"scaleOut","duration":2000,"delay":0,"targets":["b3a63116-7a6e-4db2-b09c-8043eb5fa9b8"]},{"id":"ef06c3db-f009-4a5f-9509-23c77a20ed3f","type":"effect-drop","duration":600,"delay":0,"targets":["cfa49128-ba1a-401e-8495-98a7e19203d7"]}]},{"elements":[{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"scale":100,"focalX":50,"focalY":50,"resource":{"type":"image","mimeType":"image\/jpeg","width":720,"height":1280,"sizes":{"medium":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2-169x300.jpg","width":169,"height":300,"filesize":8478,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-169x300.jpg"},"large":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2-576x1024.jpg","width":576,"height":1024,"filesize":39499,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-576x1024.jpg"},"thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2-150x150.jpg","width":150,"height":150,"filesize":6416,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-150x150.jpg"},"rpg_gallery_admin_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2-150x150.jpg","width":150,"height":150,"filesize":6416,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-150x150.jpg"},"rpg_gallery_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2-150x150.jpg","width":150,"height":150,"filesize":6416,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-150x150.jpg"},"web-stories-poster-portrait":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2-640x853.jpg","width":640,"height":853,"filesize":42823,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-640x853.jpg"},"web-stories-publisher-logo":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2-96x96.jpg","width":96,"height":96,"filesize":3611,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-96x96.jpg"},"web-stories-thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2-150x267.jpg","width":150,"height":267,"filesize":7297,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2-150x267.jpg"},"full":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-2.jpg","width":720,"height":1280,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2.jpg"}},"isPlaceholder":false,"isExternal":false,"needsProxy":false,"baseColor":"#dde0e5","blurHash":"UXQ]+z-mspbF$~Mxt5t7~pN0WAV[ETyDM|RP","creationDate":"2026-02-21T05:51:05","src":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-2.jpg","id":9241,"alt":"How QA engineers should evaluate generative AI models (2)","local":false},"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":48,"y":0,"width":330,"height":586,"mask":{"type":"rectangle"},"isBackground":true,"id":"15bfd279-f1a4-4c29-8fdc-5cbb4205548b","type":"image"},{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundTextMode":"NONE","font":{"family":"Roboto"},"fontSize":33,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"lineHeight":1.2,"textAlign":"left","padding":{"locked":true,"hasHiddenPadding":false,"horizontal":0,"vertical":0},"content":"<span style=\"font-weight: 700\">Hallucination Risk<\/span>","x":57,"y":579,"width":417,"borderRadius":{"locked":true,"topLeft":2,"topRight":2,"bottomRight":2,"bottomLeft":2},"height":39,"id":"99ce88bf-d888-452a-baac-12cc9ab0094b","type":"text"}],"backgroundColor":{"color":{"r":255,"g":255,"b":255}},"animations":[{"id":"c19a40d0-f711-4bcd-b33b-06804677c22a","type":"effect-background-zoom","zoomDirection":"scaleOut","duration":2000,"delay":0,"targets":["15bfd279-f1a4-4c29-8fdc-5cbb4205548b"]},{"id":"ee69fe74-6402-4768-8d25-7ddca6a48747","type":"effect-drop","duration":600,"delay":0,"targets":["99ce88bf-d888-452a-baac-12cc9ab0094b"]}],"id":"66cbdd88-f271-47da-b029-44a3d3df96fc","defaultBackgroundElement":{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":1,"y":1,"width":1,"height":1,"mask":{"type":"rectangle"},"isBackground":true,"isDefaultBackground":true,"type":"shape","id":"579fe7be-8186-4b89-aa2c-224545a3cebc"}},{"elements":[{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"scale":100,"focalX":50,"focalY":50,"resource":{"type":"image","mimeType":"image\/jpeg","width":720,"height":1280,"sizes":{"medium":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3-169x300.jpg","width":169,"height":300,"filesize":9530,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-169x300.jpg"},"large":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3-576x1024.jpg","width":576,"height":1024,"filesize":39175,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-576x1024.jpg"},"thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3-150x150.jpg","width":150,"height":150,"filesize":5152,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-150x150.jpg"},"rpg_gallery_admin_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3-150x150.jpg","width":150,"height":150,"filesize":5152,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-150x150.jpg"},"rpg_gallery_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3-150x150.jpg","width":150,"height":150,"filesize":5152,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-150x150.jpg"},"web-stories-poster-portrait":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3-640x853.jpg","width":640,"height":853,"filesize":38770,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-640x853.jpg"},"web-stories-publisher-logo":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3-96x96.jpg","width":96,"height":96,"filesize":3108,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-96x96.jpg"},"web-stories-thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3-150x267.jpg","width":150,"height":267,"filesize":7903,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3-150x267.jpg"},"full":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-3.jpg","width":720,"height":1280,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3.jpg"}},"isPlaceholder":false,"isExternal":false,"needsProxy":false,"baseColor":"#573b67","blurHash":"UBP?ac00sk_008ogjENG}MyDWYMe4TV?Rkxb","creationDate":"2026-02-21T05:51:06","src":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-3.jpg","id":9242,"alt":"How QA engineers should evaluate generative AI models (3)","local":false},"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":48,"y":0,"width":330,"height":586,"mask":{"type":"rectangle"},"isBackground":true,"id":"b3e5587b-e632-4895-91f0-d353d1682657","type":"image"},{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundTextMode":"NONE","font":{"family":"Roboto"},"fontSize":33,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"lineHeight":1.2,"textAlign":"left","padding":{"locked":true,"hasHiddenPadding":false,"horizontal":0,"vertical":0},"content":"<span style=\"font-weight: 700\">Context Window Size<\/span>","x":57,"y":579,"width":417,"borderRadius":{"locked":true,"topLeft":2,"topRight":2,"bottomRight":2,"bottomLeft":2},"height":39,"id":"718c96d8-0ffe-4902-8c7a-ad1933102490","type":"text"}],"backgroundColor":{"color":{"r":255,"g":255,"b":255}},"animations":[{"id":"73231cd1-7da9-4c23-b412-2e62fcf75e64","type":"effect-background-zoom","zoomDirection":"scaleOut","duration":2000,"delay":0,"targets":["b3e5587b-e632-4895-91f0-d353d1682657"]},{"id":"a46f50f8-d8b2-4e22-9590-d2788d054516","type":"effect-drop","duration":600,"delay":0,"targets":["718c96d8-0ffe-4902-8c7a-ad1933102490"]}],"id":"bca17345-6f6f-423f-8ea6-c497f50b2b5b","defaultBackgroundElement":{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":1,"y":1,"width":1,"height":1,"mask":{"type":"rectangle"},"isBackground":true,"isDefaultBackground":true,"type":"shape","id":"579fe7be-8186-4b89-aa2c-224545a3cebc"}},{"elements":[{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"scale":100,"focalX":50,"focalY":50,"resource":{"type":"image","mimeType":"image\/jpeg","width":720,"height":1280,"sizes":{"medium":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4-169x300.jpg","width":169,"height":300,"filesize":9449,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-169x300.jpg"},"large":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4-576x1024.jpg","width":576,"height":1024,"filesize":46904,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-576x1024.jpg"},"thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4-150x150.jpg","width":150,"height":150,"filesize":6671,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-150x150.jpg"},"rpg_gallery_admin_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4-150x150.jpg","width":150,"height":150,"filesize":6671,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-150x150.jpg"},"rpg_gallery_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4-150x150.jpg","width":150,"height":150,"filesize":6671,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-150x150.jpg"},"web-stories-poster-portrait":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4-640x853.jpg","width":640,"height":853,"filesize":51326,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-640x853.jpg"},"web-stories-publisher-logo":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4-96x96.jpg","width":96,"height":96,"filesize":3584,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-96x96.jpg"},"web-stories-thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4-150x267.jpg","width":150,"height":267,"filesize":7901,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4-150x267.jpg"},"full":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-4.jpg","width":720,"height":1280,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4.jpg"}},"isPlaceholder":false,"isExternal":false,"needsProxy":false,"baseColor":"#e1d7e8","blurHash":"UNR2.BE.m+%0*0vybwoy_Nt6kXRkD4TLV?V[","creationDate":"2026-02-21T05:51:07","src":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-4.jpg","id":9243,"alt":"How QA engineers should evaluate generative AI models (4)","local":false},"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":48,"y":0,"width":330,"height":586,"mask":{"type":"rectangle"},"isBackground":true,"id":"f735ba28-d248-47e0-b20e-bbb61c177361","type":"image"},{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundTextMode":"NONE","font":{"family":"Roboto"},"fontSize":33,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"lineHeight":1.2,"textAlign":"left","padding":{"locked":true,"hasHiddenPadding":false,"horizontal":0,"vertical":0},"content":"<span style=\"font-weight: 700\">Multimodal Capability<\/span>","x":47,"y":579,"width":417,"borderRadius":{"locked":true,"topLeft":2,"topRight":2,"bottomRight":2,"bottomLeft":2},"height":39,"id":"ff1ca48e-91bf-4d21-81b8-e0e8d5f2da3c","type":"text"}],"backgroundColor":{"color":{"r":255,"g":255,"b":255}},"animations":[{"id":"32eee38e-e631-4a96-b033-307f3e9146d0","type":"effect-background-zoom","zoomDirection":"scaleOut","duration":2000,"delay":0,"targets":["f735ba28-d248-47e0-b20e-bbb61c177361"]},{"id":"de89f640-0350-4090-a7c2-5b9488b7670b","type":"effect-drop","duration":600,"delay":0,"targets":["ff1ca48e-91bf-4d21-81b8-e0e8d5f2da3c"]}],"id":"ee96912d-4f76-4bde-ba00-b76aab2edbe7","defaultBackgroundElement":{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":1,"y":1,"width":1,"height":1,"mask":{"type":"rectangle"},"isBackground":true,"isDefaultBackground":true,"type":"shape","id":"579fe7be-8186-4b89-aa2c-224545a3cebc"}},{"elements":[{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"scale":100,"focalX":50,"focalY":50,"resource":{"type":"image","mimeType":"image\/jpeg","width":720,"height":1280,"sizes":{"medium":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5-169x300.jpg","width":169,"height":300,"filesize":8289,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-169x300.jpg"},"large":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5-576x1024.jpg","width":576,"height":1024,"filesize":42453,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-576x1024.jpg"},"thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5-150x150.jpg","width":150,"height":150,"filesize":5975,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-150x150.jpg"},"rpg_gallery_admin_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5-150x150.jpg","width":150,"height":150,"filesize":5975,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-150x150.jpg"},"rpg_gallery_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5-150x150.jpg","width":150,"height":150,"filesize":5975,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-150x150.jpg"},"web-stories-poster-portrait":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5-640x853.jpg","width":640,"height":853,"filesize":46893,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-640x853.jpg"},"web-stories-publisher-logo":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5-96x96.jpg","width":96,"height":96,"filesize":3190,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-96x96.jpg"},"web-stories-thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5-150x267.jpg","width":150,"height":267,"filesize":6948,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5-150x267.jpg"},"full":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-5.jpg","width":720,"height":1280,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5.jpg"}},"isPlaceholder":false,"isExternal":false,"needsProxy":false,"baseColor":"#eae8eb","blurHash":"UORMb.O[=XX9x[sStRn$_MnNS5n$MxS5R5bv","creationDate":"2026-02-21T05:51:08","src":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-5.jpg","id":9244,"alt":"How QA engineers should evaluate generative AI models (5)","local":false},"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":48,"y":0,"width":330,"height":586,"mask":{"type":"rectangle"},"isBackground":true,"id":"189b6187-caab-4d12-a6a5-5fc6c5cb639a","type":"image"},{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundTextMode":"NONE","font":{"family":"Roboto"},"fontSize":33,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"lineHeight":1.2,"textAlign":"left","padding":{"locked":true,"hasHiddenPadding":false,"horizontal":0,"vertical":0},"content":"<span style=\"font-weight: 700\">Enterprise Deployment<\/span>","x":37,"y":579,"width":417,"borderRadius":{"locked":true,"topLeft":2,"topRight":2,"bottomRight":2,"bottomLeft":2},"height":39,"id":"c3c9266f-81e9-45bc-817a-c40d43435bdd","type":"text"}],"backgroundColor":{"color":{"r":255,"g":255,"b":255}},"animations":[{"id":"1ecbe877-c88d-446d-ad6d-35cec6b48208","type":"effect-background-zoom","zoomDirection":"scaleOut","duration":2000,"delay":0,"targets":["189b6187-caab-4d12-a6a5-5fc6c5cb639a"]},{"id":"68de6cdc-858f-463a-b001-020dac2090dd","type":"effect-drop","duration":600,"delay":0,"targets":["c3c9266f-81e9-45bc-817a-c40d43435bdd"]}],"id":"4589970d-7073-4c30-b698-40d00851071d","defaultBackgroundElement":{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":1,"y":1,"width":1,"height":1,"mask":{"type":"rectangle"},"isBackground":true,"isDefaultBackground":true,"type":"shape","id":"579fe7be-8186-4b89-aa2c-224545a3cebc"}},{"elements":[{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"scale":100,"focalX":50,"focalY":50,"resource":{"type":"image","mimeType":"image\/jpeg","width":720,"height":1280,"sizes":{"medium":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6-169x300.jpg","width":169,"height":300,"filesize":10327,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-169x300.jpg"},"large":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6-576x1024.jpg","width":576,"height":1024,"filesize":44265,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-576x1024.jpg"},"thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6-150x150.jpg","width":150,"height":150,"filesize":6380,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-150x150.jpg"},"rpg_gallery_admin_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6-150x150.jpg","width":150,"height":150,"filesize":6380,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-150x150.jpg"},"rpg_gallery_thumb":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6-150x150.jpg","width":150,"height":150,"filesize":6380,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-150x150.jpg"},"web-stories-poster-portrait":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6-640x853.jpg","width":640,"height":853,"filesize":43692,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-640x853.jpg"},"web-stories-publisher-logo":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6-96x96.jpg","width":96,"height":96,"filesize":3671,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-96x96.jpg"},"web-stories-thumbnail":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6-150x267.jpg","width":150,"height":267,"filesize":8823,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6-150x267.jpg"},"full":{"file":"How-QA-engineers-should-evaluate-generative-AI-models-6.jpg","width":720,"height":1280,"mimeType":"image\/jpeg","sourceUrl":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6.jpg"}},"isPlaceholder":false,"isExternal":false,"needsProxy":false,"baseColor":"#082960","blurHash":"UTONOUxt~qM{4:Rjt6kC_MM{IA%fShs.j]t7","creationDate":"2026-02-21T05:51:09","src":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/How-QA-engineers-should-evaluate-generative-AI-models-6.jpg","id":9245,"alt":"How QA engineers should evaluate generative AI models (6)","local":false},"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":48,"y":0,"width":330,"height":586,"mask":{"type":"rectangle"},"isBackground":true,"id":"ad43bf0b-fde4-4a24-8814-44c6ea598441","type":"image"},{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundTextMode":"NONE","font":{"family":"Roboto"},"fontSize":33,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"lineHeight":1.2,"textAlign":"left","padding":{"locked":true,"hasHiddenPadding":false,"horizontal":0,"vertical":0},"content":"<span style=\"font-weight: 700\">Agentic Capability<\/span>","x":57,"y":579,"width":417,"borderRadius":{"locked":true,"topLeft":2,"topRight":2,"bottomRight":2,"bottomLeft":2},"height":39,"id":"8f77c734-e0f9-4451-9d5f-a0de23c78173","type":"text"}],"backgroundColor":{"color":{"r":255,"g":255,"b":255}},"animations":[{"id":"61eaf549-514f-405f-a784-637e73eca0bd","type":"effect-background-zoom","zoomDirection":"scaleOut","duration":2000,"delay":0,"targets":["ad43bf0b-fde4-4a24-8814-44c6ea598441"]},{"id":"5e922d5e-0af5-4a70-9749-93b649c1590f","type":"effect-drop","duration":600,"delay":0,"targets":["8f77c734-e0f9-4451-9d5f-a0de23c78173"]}],"id":"40ab2452-9f27-4f8c-8b3b-cd52b5596996","defaultBackgroundElement":{"opacity":100,"flip":{"vertical":false,"horizontal":false},"rotationAngle":0,"lockAspectRatio":true,"backgroundColor":{"color":{"r":196,"g":196,"b":196}},"x":1,"y":1,"width":1,"height":1,"mask":{"type":"rectangle"},"isBackground":true,"isDefaultBackground":true,"type":"shape","id":"579fe7be-8186-4b89-aa2c-224545a3cebc"}}],"fonts":{"Roboto":{"family":"Roboto","weights":[100,300,400,500,700,900],"styles":["italic","regular"],"variants":[[0,100],[1,100],[0,300],[1,300],[0,400],[1,400],[0,500],[1,500],[0,700],[1,700],[0,900],[1,900]],"fallbacks":["Helvetica Neue","Helvetica","sans-serif"],"service":"fonts.google.com","metrics":{"upm":2048,"asc":1900,"des":-500,"tAsc":1536,"tDes":-512,"tLGap":102,"wAsc":1946,"wDes":512,"xH":1082,"capH":1456,"yMin":-555,"yMax":2163,"hAsc":1900,"hDes":-500,"lGap":0}}},"autoAdvance":true,"defaultPageDuration":7,"currentStoryStyles":{"colors":[]}},"story_poster":{"id":9250,"url":"https:\/\/www.testleaf.com\/blog\/wp-content\/uploads\/2026\/02\/cropped-How-QA-engineers-should-evaluate-generative-AI-models.jpg","width":640,"height":853,"needsProxy":false},"_links":{"self":[{"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/web-story\/9249","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/web-story"}],"about":[{"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/wp\/v2\/types\/web-story"}],"author":[{"embeddable":true,"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/users\/1"}],"version-history":[{"count":2,"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/web-story\/9249\/revisions"}],"predecessor-version":[{"id":9252,"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/web-story\/9249\/revisions\/9252"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/media\/9250"}],"wp:attachment":[{"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/media?parent=9249"}],"wp:term":[{"taxonomy":"web_story_category","embeddable":true,"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/web_story_category?post=9249"},{"taxonomy":"web_story_tag","embeddable":true,"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/web_story_tag?post=9249"}],"wp:lock":[{"embeddable":true,"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/web-story\/9249\/lock"}],"wp:publisherlogo":[{"embeddable":true,"href":"https:\/\/www.testleaf.com\/blog\/wp-json\/web-stories\/v1\/media\/161"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}