test-metadata-optimization.js 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #!/usr/bin/env node
  2. /**
  3. * Benchmark: Optimized metadata extraction vs Full metadata extraction
  4. * Compares performance of minimal field extraction vs comprehensive extraction
  5. */
  6. const { spawn } = require('child_process');
  7. const path = require('path');
  8. // Test URLs
  9. const testUrls = [
  10. 'https://www.youtube.com/watch?v=jNQXAC9IVRw',
  11. 'https://www.youtube.com/watch?v=dQw4w9WgXcQ',
  12. 'https://www.youtube.com/watch?v=9bZkp7q19f0',
  13. 'https://www.youtube.com/watch?v=_OBlgSz8sSM'
  14. ];
  15. function getBinaryPath(name) {
  16. const ext = process.platform === 'win32' ? '.exe' : '';
  17. return path.join(__dirname, 'binaries', `${name}${ext}`);
  18. }
  19. function runCommand(command, args) {
  20. return new Promise((resolve, reject) => {
  21. const process = spawn(command, args);
  22. let output = '';
  23. let error = '';
  24. process.stdout.on('data', (data) => {
  25. output += data.toString();
  26. });
  27. process.stderr.on('data', (data) => {
  28. error += data.toString();
  29. });
  30. process.on('close', (code) => {
  31. if (code === 0) {
  32. resolve(output);
  33. } else {
  34. reject(new Error(error));
  35. }
  36. });
  37. });
  38. }
  39. // OLD METHOD: Full dump-json extraction (10+ fields)
  40. async function testFullExtraction(urls) {
  41. console.log('\n📦 Testing FULL METADATA extraction (--dump-json)...');
  42. const ytDlpPath = getBinaryPath('yt-dlp');
  43. const startTime = Date.now();
  44. const results = [];
  45. for (const url of urls) {
  46. try {
  47. const args = [
  48. '--dump-json',
  49. '--no-warnings',
  50. '--skip-download',
  51. '--ignore-errors',
  52. '--extractor-args', 'youtube:skip=hls,dash',
  53. url
  54. ];
  55. const output = await runCommand(ytDlpPath, args);
  56. const metadata = JSON.parse(output);
  57. results.push({
  58. url,
  59. title: metadata.title,
  60. duration: metadata.duration,
  61. thumbnail: metadata.thumbnail,
  62. // Plus 7 unused fields: uploader, uploadDate, viewCount, description,
  63. // availableQualities, filesize, platform
  64. });
  65. } catch (error) {
  66. console.error(` ❌ Failed to fetch ${url}`);
  67. }
  68. }
  69. const duration = Date.now() - startTime;
  70. const avgTime = duration / urls.length;
  71. console.log(` ✅ Fetched ${results.length}/${urls.length} videos`);
  72. console.log(` ⏱️ Total time: ${duration}ms`);
  73. console.log(` 📊 Average per video: ${avgTime.toFixed(1)}ms`);
  74. return { results, duration, avgTime };
  75. }
  76. // NEW METHOD: Optimized minimal extraction (3 fields only)
  77. async function testOptimizedExtraction(urls) {
  78. console.log('\n⚡ Testing OPTIMIZED METADATA extraction (--print)...');
  79. const ytDlpPath = getBinaryPath('yt-dlp');
  80. const startTime = Date.now();
  81. const results = [];
  82. for (const url of urls) {
  83. try {
  84. const args = [
  85. '--print', '%(title)s|||%(duration)s|||%(thumbnail)s',
  86. '--no-warnings',
  87. '--skip-download',
  88. '--playlist-items', '1',
  89. '--no-playlist',
  90. url
  91. ];
  92. const output = await runCommand(ytDlpPath, args);
  93. const parts = output.trim().split('|||');
  94. results.push({
  95. url,
  96. title: parts[0] || 'Unknown Title',
  97. duration: parseInt(parts[1]) || 0,
  98. thumbnail: parts[2] || null
  99. });
  100. } catch (error) {
  101. console.error(` ❌ Failed to fetch ${url}`);
  102. }
  103. }
  104. const duration = Date.now() - startTime;
  105. const avgTime = duration / urls.length;
  106. console.log(` ✅ Fetched ${results.length}/${urls.length} videos`);
  107. console.log(` ⏱️ Total time: ${duration}ms`);
  108. console.log(` 📊 Average per video: ${avgTime.toFixed(1)}ms`);
  109. return { results, duration, avgTime };
  110. }
  111. // BATCH METHOD: Optimized batch extraction
  112. async function testBatchOptimized(urls) {
  113. console.log('\n🚀 Testing BATCH OPTIMIZED extraction...');
  114. const ytDlpPath = getBinaryPath('yt-dlp');
  115. const startTime = Date.now();
  116. try {
  117. const args = [
  118. '--print', '%(webpage_url)s|||%(title)s|||%(duration)s|||%(thumbnail)s',
  119. '--no-warnings',
  120. '--skip-download',
  121. '--ignore-errors',
  122. '--playlist-items', '1',
  123. '--no-playlist',
  124. ...urls
  125. ];
  126. const output = await runCommand(ytDlpPath, args);
  127. const lines = output.trim().split('\n');
  128. const results = [];
  129. for (const line of lines) {
  130. if (!line.trim()) continue;
  131. const parts = line.split('|||');
  132. if (parts.length >= 4) {
  133. results.push({
  134. url: parts[0],
  135. title: parts[1] || 'Unknown Title',
  136. duration: parseInt(parts[2]) || 0,
  137. thumbnail: parts[3] || null
  138. });
  139. }
  140. }
  141. const duration = Date.now() - startTime;
  142. const avgTime = duration / urls.length;
  143. console.log(` ✅ Fetched ${results.length}/${urls.length} videos`);
  144. console.log(` ⏱️ Total time: ${duration}ms`);
  145. console.log(` 📊 Average per video: ${avgTime.toFixed(1)}ms`);
  146. return { results, duration, avgTime };
  147. } catch (error) {
  148. console.error(' ❌ Batch fetch failed:', error.message);
  149. return { results: [], duration: 0, avgTime: 0 };
  150. }
  151. }
  152. async function main() {
  153. console.log('🧪 Metadata Extraction Performance Benchmark');
  154. console.log('============================================\n');
  155. console.log(`Testing with ${testUrls.length} YouTube URLs...\n`);
  156. try {
  157. // Test all methods
  158. const fullMethod = await testFullExtraction(testUrls);
  159. const optimizedMethod = await testOptimizedExtraction(testUrls);
  160. const batchMethod = await testBatchOptimized(testUrls);
  161. // Compare results
  162. console.log('\n📈 Performance Comparison:');
  163. console.log('==========================');
  164. const speedupOptimized = ((fullMethod.duration - optimizedMethod.duration) / fullMethod.duration * 100).toFixed(1);
  165. const timesFasterOptimized = (fullMethod.duration / optimizedMethod.duration).toFixed(2);
  166. const speedupBatch = ((fullMethod.duration - batchMethod.duration) / fullMethod.duration * 100).toFixed(1);
  167. const timesFasterBatch = (fullMethod.duration / batchMethod.duration).toFixed(2);
  168. console.log(`\nFull (dump-json): ${fullMethod.duration}ms total (${fullMethod.avgTime.toFixed(1)}ms avg)`);
  169. console.log(`Optimized (--print): ${optimizedMethod.duration}ms total (${optimizedMethod.avgTime.toFixed(1)}ms avg)`);
  170. console.log(`Batch Optimized: ${batchMethod.duration}ms total (${batchMethod.avgTime.toFixed(1)}ms avg)`);
  171. console.log(`\n🎉 Optimized is ${speedupOptimized}% faster than Full (${timesFasterOptimized}x speedup)!`);
  172. console.log(`🚀 Batch Optimized is ${speedupBatch}% faster than Full (${timesFasterBatch}x speedup)!`);
  173. // Memory savings
  174. const fieldsOld = 10;
  175. const fieldsNew = 3;
  176. const memorySavings = ((fieldsOld - fieldsNew) / fieldsOld * 100).toFixed(1);
  177. console.log(`\n💾 Memory Benefits:`);
  178. console.log(` Extracted fields reduced: ${fieldsOld} → ${fieldsNew} (${memorySavings}% less data)`);
  179. console.log(` No JSON parsing overhead`);
  180. console.log(` No format list extraction (biggest bottleneck eliminated)`);
  181. console.log('\n✅ Benchmark complete!');
  182. } catch (error) {
  183. console.error('❌ Benchmark failed:', error);
  184. process.exit(1);
  185. }
  186. }
  187. main();