( ! ) Deprecated: strip_tags(): Passing null to parameter #1 ($string) of type string is deprecated in /home/jlahijani/Sites/goodui.org/site/templates/_og.php on line 13 Call Stack #TimeMemoryFunctionLocation 10.0001465800{main}( ).../index.php:0 20.26352254824ProcessWire\Wire->__call( $method = 'execute', $arguments = [0 => TRUE] ).../index.php:55 30.26352254824ProcessWire\WireHooks->runHooks( $object = class ProcessWire\ProcessPageView { }, $method = 'execute', $arguments = [0 => TRUE], $type = ??? ).../Wire.php:484 40.26352255240ProcessWire\Wire->_callMethod( $method = '___execute', $arguments = [0 => TRUE] ).../WireHooks.php:952 50.26352255240ProcessWire\ProcessPageView->___execute( $internal = TRUE ).../Wire.php:416 60.26902295640ProcessWire\ProcessPageView->renderPage( $page = class ProcessWire\Page { public $id = 3170; public $name = 'simulating-how-long-i-should-run-my-test'; public $parent = '/blog/'; public $template = 'post'; public $title = 'Simulating How Long To Run Your Test'; public $post_tags = '(PageArray) 3143'; public $body_blocks = '(RepeaterMatrixPageArray) 6581'; public $date = 1461098916; public $comments = '(CommentArray) 472|471'; public $summary = ''; public $body = ''; public $data = ['title' => 'Simulating How Long To Run Your Test', 'post_tags' => class ProcessWire\PageArray { ... }, 'body_blocks' => class ProcessWire\RepeaterMatrixPageArray { ... }, 'date' => 1461098916, 'comments' => class ProcessWire\CommentArray { ... }, 'summary' => '', 'body' => ''] }, $request = class ProcessWire\PagesRequest { } ).../ProcessPageView.module:114 70.27942325776ProcessWire\Wire->__call( $method = 'render', $arguments = [] ).../ProcessPageView.module:184 80.27942325776ProcessWire\WireHooks->runHooks( $object = class ProcessWire\Page { public $id = 3170; public $name = 'simulating-how-long-i-should-run-my-test'; public $parent = '/blog/'; public $template = 'post'; public $title = 'Simulating How Long To Run Your Test'; public $post_tags = '(PageArray) 3143'; public $body_blocks = '(RepeaterMatrixPageArray) 6581'; public $date = 1461098916; public $comments = '(CommentArray) 472|471'; public $summary = ''; public $body = ''; public $data = ['title' => 'Simulating How Long To Run Your Test', 'post_tags' => class ProcessWire\PageArray { ... }, 'body_blocks' => class ProcessWire\RepeaterMatrixPageArray { ... }, 'date' => 1461098916, 'comments' => class ProcessWire\CommentArray { ... }, 'summary' => '', 'body' => ''] }, $method = 'render', $arguments = [], $type = ??? ).../Wire.php:484 90.27942327736ProcessWire\Wire->__call( $method = 'renderPage', $arguments = [0 => class ProcessWire\HookEvent { public $data = [...] }] ).../WireHooks.php:1060 100.27942327736ProcessWire\WireHooks->runHooks( $object = class ProcessWire\PageRender { public $data = ['clearCache' => 1] }, $method = 'renderPage', $arguments = [0 => class ProcessWire\HookEvent { public $data = [...] }], $type = ??? ).../Wire.php:484 110.27952328152ProcessWire\Wire->_callMethod( $method = '___renderPage', $arguments = [0 => class ProcessWire\HookEvent { public $data = [...] }] ).../WireHooks.php:952 120.27952328152ProcessWire\PageRender->___renderPage( $event = class ProcessWire\HookEvent { public $data = ['object' => class ProcessWire\Page { ... }, 'method' => 'render', 'arguments' => [...], 'return' => NULL, 'replace' => FALSE, 'options' => [...], 'id' => 'Page:100.0:render', 'cancelHooks' => FALSE, 'when' => 'after'] } ).../Wire.php:416 130.28012328704ProcessWire\Wire->__call( $method = 'render', $arguments = [] ).../PageRender.module:575 140.28012328704ProcessWire\WireHooks->runHooks( $object = class ProcessWire\TemplateFile { public $data = ['wire' => class ProcessWire\ProcessWire { ... }, 'classLoader' => class ProcessWire\WireClassLoader { ... }, 'hooks' => class ProcessWire\WireHooks { ... }, 'config' => class ProcessWire\Config { ... }, 'notices' => class ProcessWire\Notices { ... }, 'urls' => class ProcessWire\Paths { ... }, 'log' => class ProcessWire\WireLog { ... }, 'sanitizer' => class ProcessWire\Sanitizer { ... }, 'datetime' => class ProcessWire\WireDateTime { ... }, 'files' => class ProcessWire\WireFileTools { ... }, 'mail' => class ProcessWire\WireMailTools { ... }, 'database' => class ProcessWire\WireDatabasePDO { ... }, 'db' => class ProcessWire\DatabaseMysqli { ... }, 'cache' => class ProcessWire\WireCache { ... }, 'modules' => class ProcessWire\Modules { ... }, 'procache' => class ProcessWire\ProCache { ... }, 'fieldtypes' => class ProcessWire\Fieldtypes { ... }, 'fields' => class ProcessWire\Fields { ... }, 'fieldgroups' => class ProcessWire\Fieldgroups { ... }, 'templates' => class ProcessWire\Templates { ... }, 'pages' => class ProcessWire\Pages { ... }, 'permissions' => class ProcessWire\Permissions { ... }, 'roles' => class ProcessWire\Roles { ... }, 'users' => class ProcessWire\Users { ... }, 'user' => class ProcessWire\User { ... }, 'session' => class ProcessWire\Session { ... }, 'input' => class ProcessWire\WireInput { ... }, 'buster' => class ProcessWire\ProCacheBuster { ... }, 'process' => class ProcessWire\ProcessPageView { ... }, 'page' => class ProcessWire\Page { ... }, 'options' => [...]] }, $method = 'render', $arguments = [], $type = ??? ).../Wire.php:484 150.28012330056ProcessWire\Wire->_callMethod( $method = '___render', $arguments = [] ).../WireHooks.php:952 160.28012330056ProcessWire\TemplateFile->___render( ).../Wire.php:413 170.39173024472require( '/home/jlahijani/Sites/goodui.org/site/templates/_main.php ).../TemplateFile.php:340 180.39293067784include( '/home/jlahijani/Sites/goodui.org/site/templates/_og.php ).../_main.php:64 190.39493068520strip_tags( $string = NULL ).../_og.php:13 " />

Simulating How Long To Run Your Test

How much time is enough for the true performance of your variations to come through the noise?

In this video, we'll see a simulation of an A/A/B/C/D test as it moves from the initial state dominated by chance towards a state of equilibrium. In the process, we observe how the performance of variations can change over time due to chance alone and what sorts of intermediate outcomes we can expect. How does a false positive tend to behave over time? What is a true +10% winner likely to do half way into the test? Answering these questions helps me interpret real tests.

To speed things up, this simulation is based on a 20% baseline conversion rate and 1,000 visitor hits per day. The duration of 10 days is just an example. In your real tests, the conversion rate might be as low as 1%, which means it would take far longer to get to a similar equilibrium.

Exercise:

  1. Use Evan Miller's Sample Size Calculator to calculate the sample size needed to detect a 10% relative lift over a 20% baseline (answer is at the bottom of this post) - leave the power and significance on default.
  2. Rewatch the simulation video and see how the test behaves as it approaches this sample size target.
  3. Consider: How accurate is the relative performance of each variation at this point? What sort of outcomes are still possible by chance alone that would obscure the true performance of the variations? Based on this simulation would you run your test longer or less than this target?

Do you use simulations for planning and analysis? Share with us.

(Answer: 6347 visitors per variation)




Comments

  • Gavin Morrice

    Gavin Morrice 9 years ago 10

    I just built a small Ruby program that simulates this, the results are pretty mind-blowing.
    It takes several thousand samples before you get an accurate result with Split A/B testing. Even more if you're using something like an Epsilon Greedy algorithm.

    Have a play around with it, the code is on GitHub
    https://github.com/Bodacious/OptimisationTestComparisons/

    • Jakub Linowski

      Jakub Linowski 9 years ago 00

      Hey Gavin. Awesomeness. These simulations are nice in that they visualize false positives and false negatives. Agreed. And yes, it usually takes thousands of visitors for the patterns to stabilize. I think the key factors are: sample size, baseline conversion rate, and the magnitude of effect.

      Unfortunately, I don't know Ruby to run your script :(
      Cheers,
      J