s3_iam_distributed_test.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. package iam
  2. import (
  3. "fmt"
  4. "os"
  5. "strings"
  6. "sync"
  7. "testing"
  8. "time"
  9. "github.com/aws/aws-sdk-go/aws"
  10. "github.com/aws/aws-sdk-go/service/s3"
  11. "github.com/stretchr/testify/assert"
  12. "github.com/stretchr/testify/require"
  13. )
  14. // TestS3IAMDistributedTests tests IAM functionality across multiple S3 gateway instances
  15. func TestS3IAMDistributedTests(t *testing.T) {
  16. // Skip if not in distributed test mode
  17. if os.Getenv("ENABLE_DISTRIBUTED_TESTS") != "true" {
  18. t.Skip("Distributed tests not enabled. Set ENABLE_DISTRIBUTED_TESTS=true")
  19. }
  20. framework := NewS3IAMTestFramework(t)
  21. defer framework.Cleanup()
  22. t.Run("distributed_session_consistency", func(t *testing.T) {
  23. // Test that sessions created on one instance are visible on others
  24. // This requires filer-based session storage
  25. // Create S3 clients that would connect to different gateway instances
  26. // In a real distributed setup, these would point to different S3 gateway ports
  27. client1, err := framework.CreateS3ClientWithJWT("test-user", "TestAdminRole")
  28. require.NoError(t, err)
  29. client2, err := framework.CreateS3ClientWithJWT("test-user", "TestAdminRole")
  30. require.NoError(t, err)
  31. // Both clients should be able to perform operations
  32. bucketName := "test-distributed-session"
  33. err = framework.CreateBucket(client1, bucketName)
  34. require.NoError(t, err)
  35. // Client2 should see the bucket created by client1
  36. listResult, err := client2.ListBuckets(&s3.ListBucketsInput{})
  37. require.NoError(t, err)
  38. found := false
  39. for _, bucket := range listResult.Buckets {
  40. if *bucket.Name == bucketName {
  41. found = true
  42. break
  43. }
  44. }
  45. assert.True(t, found, "Bucket should be visible across distributed instances")
  46. // Cleanup
  47. _, err = client1.DeleteBucket(&s3.DeleteBucketInput{
  48. Bucket: aws.String(bucketName),
  49. })
  50. require.NoError(t, err)
  51. })
  52. t.Run("distributed_role_consistency", func(t *testing.T) {
  53. // Test that role definitions are consistent across instances
  54. // This requires filer-based role storage
  55. // Create clients with different roles
  56. adminClient, err := framework.CreateS3ClientWithJWT("admin-user", "TestAdminRole")
  57. require.NoError(t, err)
  58. readOnlyClient, err := framework.CreateS3ClientWithJWT("readonly-user", "TestReadOnlyRole")
  59. require.NoError(t, err)
  60. bucketName := "test-distributed-roles"
  61. objectKey := "test-object.txt"
  62. // Admin should be able to create bucket
  63. err = framework.CreateBucket(adminClient, bucketName)
  64. require.NoError(t, err)
  65. // Admin should be able to put object
  66. err = framework.PutTestObject(adminClient, bucketName, objectKey, "test content")
  67. require.NoError(t, err)
  68. // Read-only user should be able to get object
  69. content, err := framework.GetTestObject(readOnlyClient, bucketName, objectKey)
  70. require.NoError(t, err)
  71. assert.Equal(t, "test content", content)
  72. // Read-only user should NOT be able to put object
  73. err = framework.PutTestObject(readOnlyClient, bucketName, "forbidden-object.txt", "forbidden content")
  74. require.Error(t, err, "Read-only user should not be able to put objects")
  75. // Cleanup
  76. err = framework.DeleteTestObject(adminClient, bucketName, objectKey)
  77. require.NoError(t, err)
  78. _, err = adminClient.DeleteBucket(&s3.DeleteBucketInput{
  79. Bucket: aws.String(bucketName),
  80. })
  81. require.NoError(t, err)
  82. })
  83. t.Run("distributed_concurrent_operations", func(t *testing.T) {
  84. // Test concurrent operations across distributed instances with robust retry mechanisms
  85. // This approach implements proper retry logic instead of tolerating errors to catch real concurrency issues
  86. const numGoroutines = 3 // Reduced concurrency for better CI reliability
  87. const numOperationsPerGoroutine = 2 // Minimal operations per goroutine
  88. const maxRetries = 3 // Maximum retry attempts for transient failures
  89. const retryDelay = 200 * time.Millisecond // Increased delay for better stability
  90. var wg sync.WaitGroup
  91. errors := make(chan error, numGoroutines*numOperationsPerGoroutine)
  92. // Helper function to determine if an error is retryable
  93. isRetryableError := func(err error) bool {
  94. if err == nil {
  95. return false
  96. }
  97. errorMsg := err.Error()
  98. return strings.Contains(errorMsg, "timeout") ||
  99. strings.Contains(errorMsg, "connection reset") ||
  100. strings.Contains(errorMsg, "temporary failure") ||
  101. strings.Contains(errorMsg, "TooManyRequests") ||
  102. strings.Contains(errorMsg, "ServiceUnavailable") ||
  103. strings.Contains(errorMsg, "InternalError")
  104. }
  105. // Helper function to execute operations with retry logic
  106. executeWithRetry := func(operation func() error, operationName string) error {
  107. var lastErr error
  108. for attempt := 0; attempt <= maxRetries; attempt++ {
  109. if attempt > 0 {
  110. time.Sleep(retryDelay * time.Duration(attempt)) // Linear backoff
  111. }
  112. lastErr = operation()
  113. if lastErr == nil {
  114. return nil // Success
  115. }
  116. if !isRetryableError(lastErr) {
  117. // Non-retryable error - fail immediately
  118. return fmt.Errorf("%s failed with non-retryable error: %w", operationName, lastErr)
  119. }
  120. // Retryable error - continue to next attempt
  121. if attempt < maxRetries {
  122. t.Logf("Retrying %s (attempt %d/%d) after error: %v", operationName, attempt+1, maxRetries, lastErr)
  123. }
  124. }
  125. // All retries exhausted
  126. return fmt.Errorf("%s failed after %d retries, last error: %w", operationName, maxRetries, lastErr)
  127. }
  128. for i := 0; i < numGoroutines; i++ {
  129. wg.Add(1)
  130. go func(goroutineID int) {
  131. defer wg.Done()
  132. client, err := framework.CreateS3ClientWithJWT(fmt.Sprintf("user-%d", goroutineID), "TestAdminRole")
  133. if err != nil {
  134. errors <- fmt.Errorf("failed to create S3 client for goroutine %d: %w", goroutineID, err)
  135. return
  136. }
  137. for j := 0; j < numOperationsPerGoroutine; j++ {
  138. bucketName := fmt.Sprintf("test-concurrent-%d-%d", goroutineID, j)
  139. objectKey := "test-object.txt"
  140. objectContent := fmt.Sprintf("content-%d-%d", goroutineID, j)
  141. // Execute full operation sequence with individual retries
  142. operationFailed := false
  143. // 1. Create bucket with retry
  144. if err := executeWithRetry(func() error {
  145. return framework.CreateBucket(client, bucketName)
  146. }, fmt.Sprintf("CreateBucket-%s", bucketName)); err != nil {
  147. errors <- err
  148. operationFailed = true
  149. }
  150. if !operationFailed {
  151. // 2. Put object with retry
  152. if err := executeWithRetry(func() error {
  153. return framework.PutTestObject(client, bucketName, objectKey, objectContent)
  154. }, fmt.Sprintf("PutObject-%s/%s", bucketName, objectKey)); err != nil {
  155. errors <- err
  156. operationFailed = true
  157. }
  158. }
  159. if !operationFailed {
  160. // 3. Get object with retry
  161. if err := executeWithRetry(func() error {
  162. _, err := framework.GetTestObject(client, bucketName, objectKey)
  163. return err
  164. }, fmt.Sprintf("GetObject-%s/%s", bucketName, objectKey)); err != nil {
  165. errors <- err
  166. operationFailed = true
  167. }
  168. }
  169. if !operationFailed {
  170. // 4. Delete object with retry
  171. if err := executeWithRetry(func() error {
  172. return framework.DeleteTestObject(client, bucketName, objectKey)
  173. }, fmt.Sprintf("DeleteObject-%s/%s", bucketName, objectKey)); err != nil {
  174. errors <- err
  175. operationFailed = true
  176. }
  177. }
  178. // 5. Always attempt bucket cleanup, even if previous operations failed
  179. if err := executeWithRetry(func() error {
  180. _, err := client.DeleteBucket(&s3.DeleteBucketInput{
  181. Bucket: aws.String(bucketName),
  182. })
  183. return err
  184. }, fmt.Sprintf("DeleteBucket-%s", bucketName)); err != nil {
  185. // Only log cleanup failures, don't fail the test
  186. t.Logf("Warning: Failed to cleanup bucket %s: %v", bucketName, err)
  187. }
  188. // Increased delay between operation sequences to reduce server load and improve stability
  189. time.Sleep(100 * time.Millisecond)
  190. }
  191. }(i)
  192. }
  193. wg.Wait()
  194. close(errors)
  195. // Collect and analyze errors - with retry logic, we should see very few errors
  196. var errorList []error
  197. for err := range errors {
  198. errorList = append(errorList, err)
  199. }
  200. totalOperations := numGoroutines * numOperationsPerGoroutine
  201. // Report results
  202. if len(errorList) == 0 {
  203. t.Logf("🎉 All %d concurrent operations completed successfully with retry mechanisms!", totalOperations)
  204. } else {
  205. t.Logf("Concurrent operations summary:")
  206. t.Logf(" Total operations: %d", totalOperations)
  207. t.Logf(" Failed operations: %d (%.1f%% error rate)", len(errorList), float64(len(errorList))/float64(totalOperations)*100)
  208. // Log first few errors for debugging
  209. for i, err := range errorList {
  210. if i >= 3 { // Limit to first 3 errors
  211. t.Logf(" ... and %d more errors", len(errorList)-3)
  212. break
  213. }
  214. t.Logf(" Error %d: %v", i+1, err)
  215. }
  216. }
  217. // With proper retry mechanisms, we should expect near-zero failures
  218. // Any remaining errors likely indicate real concurrency issues or system problems
  219. if len(errorList) > 0 {
  220. t.Errorf("❌ %d operation(s) failed even after retry mechanisms (%.1f%% failure rate). This indicates potential system issues or race conditions that need investigation.",
  221. len(errorList), float64(len(errorList))/float64(totalOperations)*100)
  222. }
  223. })
  224. }
  225. // TestS3IAMPerformanceTests tests IAM performance characteristics
  226. func TestS3IAMPerformanceTests(t *testing.T) {
  227. // Skip if not in performance test mode
  228. if os.Getenv("ENABLE_PERFORMANCE_TESTS") != "true" {
  229. t.Skip("Performance tests not enabled. Set ENABLE_PERFORMANCE_TESTS=true")
  230. }
  231. framework := NewS3IAMTestFramework(t)
  232. defer framework.Cleanup()
  233. t.Run("authentication_performance", func(t *testing.T) {
  234. // Test authentication performance
  235. const numRequests = 100
  236. client, err := framework.CreateS3ClientWithJWT("perf-user", "TestAdminRole")
  237. require.NoError(t, err)
  238. bucketName := "test-auth-performance"
  239. err = framework.CreateBucket(client, bucketName)
  240. require.NoError(t, err)
  241. defer func() {
  242. _, err := client.DeleteBucket(&s3.DeleteBucketInput{
  243. Bucket: aws.String(bucketName),
  244. })
  245. require.NoError(t, err)
  246. }()
  247. start := time.Now()
  248. for i := 0; i < numRequests; i++ {
  249. _, err := client.ListBuckets(&s3.ListBucketsInput{})
  250. require.NoError(t, err)
  251. }
  252. duration := time.Since(start)
  253. avgLatency := duration / numRequests
  254. t.Logf("Authentication performance: %d requests in %v (avg: %v per request)",
  255. numRequests, duration, avgLatency)
  256. // Performance assertion - should be under 100ms per request on average
  257. assert.Less(t, avgLatency, 100*time.Millisecond,
  258. "Average authentication latency should be under 100ms")
  259. })
  260. t.Run("authorization_performance", func(t *testing.T) {
  261. // Test authorization performance with different policy complexities
  262. const numRequests = 50
  263. client, err := framework.CreateS3ClientWithJWT("perf-user", "TestAdminRole")
  264. require.NoError(t, err)
  265. bucketName := "test-authz-performance"
  266. err = framework.CreateBucket(client, bucketName)
  267. require.NoError(t, err)
  268. defer func() {
  269. _, err := client.DeleteBucket(&s3.DeleteBucketInput{
  270. Bucket: aws.String(bucketName),
  271. })
  272. require.NoError(t, err)
  273. }()
  274. start := time.Now()
  275. for i := 0; i < numRequests; i++ {
  276. objectKey := fmt.Sprintf("perf-object-%d.txt", i)
  277. err := framework.PutTestObject(client, bucketName, objectKey, "performance test content")
  278. require.NoError(t, err)
  279. _, err = framework.GetTestObject(client, bucketName, objectKey)
  280. require.NoError(t, err)
  281. err = framework.DeleteTestObject(client, bucketName, objectKey)
  282. require.NoError(t, err)
  283. }
  284. duration := time.Since(start)
  285. avgLatency := duration / (numRequests * 3) // 3 operations per iteration
  286. t.Logf("Authorization performance: %d operations in %v (avg: %v per operation)",
  287. numRequests*3, duration, avgLatency)
  288. // Performance assertion - should be under 50ms per operation on average
  289. assert.Less(t, avgLatency, 50*time.Millisecond,
  290. "Average authorization latency should be under 50ms")
  291. })
  292. }
  293. // BenchmarkS3IAMAuthentication benchmarks JWT authentication
  294. func BenchmarkS3IAMAuthentication(b *testing.B) {
  295. if os.Getenv("ENABLE_PERFORMANCE_TESTS") != "true" {
  296. b.Skip("Performance tests not enabled. Set ENABLE_PERFORMANCE_TESTS=true")
  297. }
  298. framework := NewS3IAMTestFramework(&testing.T{})
  299. defer framework.Cleanup()
  300. client, err := framework.CreateS3ClientWithJWT("bench-user", "TestAdminRole")
  301. require.NoError(b, err)
  302. bucketName := "test-bench-auth"
  303. err = framework.CreateBucket(client, bucketName)
  304. require.NoError(b, err)
  305. defer func() {
  306. _, err := client.DeleteBucket(&s3.DeleteBucketInput{
  307. Bucket: aws.String(bucketName),
  308. })
  309. require.NoError(b, err)
  310. }()
  311. b.ResetTimer()
  312. b.RunParallel(func(pb *testing.PB) {
  313. for pb.Next() {
  314. _, err := client.ListBuckets(&s3.ListBucketsInput{})
  315. if err != nil {
  316. b.Error(err)
  317. }
  318. }
  319. })
  320. }
  321. // BenchmarkS3IAMAuthorization benchmarks policy evaluation
  322. func BenchmarkS3IAMAuthorization(b *testing.B) {
  323. if os.Getenv("ENABLE_PERFORMANCE_TESTS") != "true" {
  324. b.Skip("Performance tests not enabled. Set ENABLE_PERFORMANCE_TESTS=true")
  325. }
  326. framework := NewS3IAMTestFramework(&testing.T{})
  327. defer framework.Cleanup()
  328. client, err := framework.CreateS3ClientWithJWT("bench-user", "TestAdminRole")
  329. require.NoError(b, err)
  330. bucketName := "test-bench-authz"
  331. err = framework.CreateBucket(client, bucketName)
  332. require.NoError(b, err)
  333. defer func() {
  334. _, err := client.DeleteBucket(&s3.DeleteBucketInput{
  335. Bucket: aws.String(bucketName),
  336. })
  337. require.NoError(b, err)
  338. }()
  339. b.ResetTimer()
  340. b.RunParallel(func(pb *testing.PB) {
  341. i := 0
  342. for pb.Next() {
  343. objectKey := fmt.Sprintf("bench-object-%d.txt", i)
  344. err := framework.PutTestObject(client, bucketName, objectKey, "benchmark content")
  345. if err != nil {
  346. b.Error(err)
  347. }
  348. i++
  349. }
  350. })
  351. }