diff --git a/examples/cassie_functions/README.md b/examples/cassie_functions/README.md new file mode 100644 index 0000000..c33c6ee --- /dev/null +++ b/examples/cassie_functions/README.md @@ -0,0 +1,91 @@ +# Cassie Functions - Example Usage Scripts + +This directory contains minimal example scripts demonstrating how to call each function that was migrated from `pycassa` to the `cassandra` ORM in `src/errors/cassie.py`. + +## Purpose + +These scripts provide: +- Clear examples of function signatures and parameters +- Sample input data for each function +- Basic usage patterns + +## Important Notes + +β οΈ **These are example scripts only** - They demonstrate the API but won't run successfully without: +- A properly configured Cassandra database connection (configured via `errortracker.config`) +- Valid data in the database +- Required dependencies installed (cassandra-driver, numpy, etc.) + +Each script includes a call to `setup_cassandra()` which initializes the Cassandra connection before using any functions. This function: +- Sets up the database connection using credentials from the configuration +- Synchronizes the database schema +- Ensures the connection is ready for queries + +## Structure + +Each file corresponds to one function in `cassie.py`: +- `get_total_buckets_by_day.py` - Example for `get_total_buckets_by_day()` +- `get_bucket_counts.py` - Example for `get_bucket_counts()` +- `get_crashes_for_bucket.py` - Example for `get_crashes_for_bucket()` +- And so on... + +## Usage + +To understand how to use a specific function: + +1. Open the corresponding `.py` file +2. Review the function call with example parameters +3. Adapt the parameters to your use case + +Example: +```bash +# View the example (won't execute without DB connection) +cat get_bucket_counts.py +``` + +## Functions Included + +All functions migrated from pycassa to cassandra ORM: + +### Bucket Operations +- `get_total_buckets_by_day` - Get bucket counts by day +- `get_bucket_counts` - Get bucket counts with filtering +- `get_crashes_for_bucket` - Get crashes for a specific bucket +- `get_package_for_bucket` - Get package info for bucket +- `get_metadata_for_bucket` - Get metadata for bucket +- `get_metadata_for_buckets` - Get metadata for multiple buckets +- `get_versions_for_bucket` - Get versions for bucket +- `get_source_package_for_bucket` - Get source package +- `get_retrace_failure_for_bucket` - Get retrace failure info +- `get_traceback_for_bucket` - Get traceback for bucket +- `get_stacktrace_for_bucket` - Get stacktrace for bucket +- `bucket_exists` - Check if bucket exists + +### Crash Operations +- `get_crash` - Get crash details +- `get_crash_count` - Get crash counts over time +- `get_user_crashes` - Get crashes for a user +- `get_average_crashes` - Get average crashes per user +- `get_average_instances` - Get average instances for bucket + +### Package Operations +- `get_package_crash_rate` - Analyze package crash rates +- `get_package_new_buckets` - Get new buckets for package version +- `get_binary_packages_for_user` - Get user's packages + +### Retracer Operations +- `get_retracer_count` - Get retracer count for date +- `get_retracer_counts` - Get retracer counts over time +- `get_retracer_means` - Get mean retracing times + +### Bug/Signature Operations +- `record_bug_for_bucket` - Record a bug for bucket +- `get_signatures_for_bug` - Get signatures for bug +- `get_problem_for_hash` - Get problem for hash + +### System Image Operations +- `get_system_image_versions` - Get system image versions + +## Migration Notes + +These functions were migrated from the deprecated `pycassa` library to the modern `cassandra-driver` ORM while maintaining backward compatibility. diff --git a/examples/cassie_functions/bucket_exists.py b/examples/cassie_functions/bucket_exists.py new file mode 100644 index 0000000..dc358d9 --- /dev/null +++ b/examples/cassie_functions/bucket_exists.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +"""Example usage of bucket_exists function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import bucket_exists + +# Setup Cassandra connection +setup_cassandra() + +# Example: Check if a bucket exists +bucketid = "/bin/zsh:11:makezleparams:execzlefunc:redrawhook:zlecore:zleread" + +exists = bucket_exists(bucketid) +print(f"Bucket {bucketid} exists: {exists}") diff --git a/examples/cassie_functions/get_average_crashes.py b/examples/cassie_functions/get_average_crashes.py new file mode 100644 index 0000000..4a6a90e --- /dev/null +++ b/examples/cassie_functions/get_average_crashes.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +"""Example usage of get_average_crashes function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_average_crashes + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get average crashes per user +field = "zsh:5.9-6ubuntu2" +release = "Ubuntu 24.04" +days = 14 + +data = get_average_crashes(field, release, days=days) +print(f"Average crash data: {data}") +for timestamp, avg in data: + print(f"Timestamp: {timestamp}, Average: {avg}") diff --git a/examples/cassie_functions/get_average_instances.py b/examples/cassie_functions/get_average_instances.py new file mode 100644 index 0000000..931efbd --- /dev/null +++ b/examples/cassie_functions/get_average_instances.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +"""Example usage of get_average_instances function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_average_instances + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get average instances for a bucket +bucketid = "/bin/zsh:11:makezleparams:execzlefunc:redrawhook:zlecore:zleread" +release = "Ubuntu 24.04" +days = 7 + +for timestamp, avg in get_average_instances(bucketid, release, days=days): + print(f"Timestamp: {timestamp}, Average: {avg}") diff --git a/examples/cassie_functions/get_binary_packages_for_user.py b/examples/cassie_functions/get_binary_packages_for_user.py new file mode 100644 index 0000000..abafbe9 --- /dev/null +++ b/examples/cassie_functions/get_binary_packages_for_user.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +"""Example usage of get_binary_packages_for_user function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_binary_packages_for_user + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get binary packages for a user +user = "foundations-bugs" # quite slow (~1m56s) +user = "xubuntu-bugs" # way faster (~12s) + +packages = get_binary_packages_for_user(user) +if packages: + print(f"Found {len(packages)} packages") + for package in packages: + print(f"Package: {package}") +else: + print("No packages found") diff --git a/examples/cassie_functions/get_bucket_counts.py b/examples/cassie_functions/get_bucket_counts.py new file mode 100644 index 0000000..68ba2ae --- /dev/null +++ b/examples/cassie_functions/get_bucket_counts.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +"""Example usage of get_bucket_counts function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_bucket_counts + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get bucket counts for Ubuntu 24.04 today +print("Ubuntu 24.04 - today") +result = get_bucket_counts( + release="Ubuntu 24.04", + period="today" +) + +print(f"Found {len(result)} buckets") +for bucket, count in result[:30]: + print(f"Bucket: {bucket}, Count: {count}") +# Example: Get bucket counts for Ubuntu 24.04 today + +print("Past week") +result = get_bucket_counts( + period="week" +) + +print(f"Found {len(result)} buckets") +for bucket, count in result[:30]: + print(f"Bucket: {bucket}, Count: {count}") + +print("Past month") +result = get_bucket_counts( + period="month" +) + +print(f"Found {len(result)} buckets") +for bucket, count in result[:30]: + print(f"Bucket: {bucket}, Count: {count}") + +print("Nautilus package - today") +result = get_bucket_counts( + period="today", + package="nautilus", +) + +print(f"Found {len(result)} buckets") +for bucket, count in result[:30]: + print(f"Bucket: {bucket}, Count: {count}") diff --git a/examples/cassie_functions/get_crash.py b/examples/cassie_functions/get_crash.py new file mode 100644 index 0000000..e027e0b --- /dev/null +++ b/examples/cassie_functions/get_crash.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +"""Example usage of get_crash function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_crash + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get crash details +oopsid = "e3855456-cecb-11f0-b91f-fa163ec44ecd" +columns = ["Package", "StacktraceAddressSignature"] + +crash_data = get_crash(oopsid, columns=columns) +print(f"Crash data: {crash_data}") diff --git a/examples/cassie_functions/get_crash_count.py b/examples/cassie_functions/get_crash_count.py new file mode 100644 index 0000000..2ba8db9 --- /dev/null +++ b/examples/cassie_functions/get_crash_count.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +"""Example usage of get_crash_count function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_crash_count + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get crash count for Ubuntu 24.04 +start = 3 +finish = 10 +release = "Ubuntu 24.04" + +for date, count in get_crash_count(start, finish, release=release): + print(f"Date: {date}, Release: {release}, Crashes: {count}") + +for date, count in get_crash_count(start, finish): + print(f"Date: {date}, Crashes: {count}") diff --git a/examples/cassie_functions/get_crashes_for_bucket.py b/examples/cassie_functions/get_crashes_for_bucket.py new file mode 100644 index 0000000..6d86dc7 --- /dev/null +++ b/examples/cassie_functions/get_crashes_for_bucket.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +"""Example usage of get_crashes_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_crashes_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get crashes for a specific bucket +bucketid = "/bin/zsh:11:makezleparams:execzlefunc:redrawhook:zlecore:zleread" +limit = 10 + +crashes = get_crashes_for_bucket(bucketid, limit=limit) +print(f"Found {len(crashes)} crashes") +for crash in crashes: + print(f"Crash ID: {crash}") + +start_uuid = "cbb0a4b6-d120-11f0-a9ed-fa163ec8ca8c" +crashes = get_crashes_for_bucket(bucketid, limit=limit, start=start_uuid) +print(f"Found {len(crashes)} crashes (started at {start_uuid})") +for crash in crashes: + print(f"Crash ID: {crash}") diff --git a/examples/cassie_functions/get_metadata_for_bucket.py b/examples/cassie_functions/get_metadata_for_bucket.py new file mode 100644 index 0000000..15c94bd --- /dev/null +++ b/examples/cassie_functions/get_metadata_for_bucket.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +"""Example usage of get_metadata_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_metadata_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get metadata for a specific bucket +bucketid = "/bin/zsh:11:makezleparams:execzlefunc:redrawhook:zlecore:zleread" +release = "Ubuntu 24.04" + +metadata = get_metadata_for_bucket(bucketid, release=release) +print(f"Metadata: {metadata}") diff --git a/examples/cassie_functions/get_metadata_for_buckets.py b/examples/cassie_functions/get_metadata_for_buckets.py new file mode 100644 index 0000000..0ea89b8 --- /dev/null +++ b/examples/cassie_functions/get_metadata_for_buckets.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +"""Example usage of get_metadata_for_buckets function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_metadata_for_buckets + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get metadata for multiple buckets +bucketids = ["bucket_1", "bucket_2", "bucket_3"] +release = "Ubuntu 24.04" + +metadata_dict = get_metadata_for_buckets(bucketids, release=release) +for bucketid, metadata in metadata_dict.items(): + print(f"Bucket {bucketid}: {metadata}") diff --git a/examples/cassie_functions/get_package_crash_rate.py b/examples/cassie_functions/get_package_crash_rate.py new file mode 100644 index 0000000..c654eea --- /dev/null +++ b/examples/cassie_functions/get_package_crash_rate.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +"""Example usage of get_package_crash_rate function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_package_crash_rate + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get crash rate for a package update +release = "Ubuntu 24.04" +src_package = "firefox" +old_version = "120.0" +new_version = "121.0" +pup = 100 # Phased update percentage +date = "20231115" +absolute_uri = "https://errors.ubuntu.com" + +result = get_package_crash_rate( + release, src_package, old_version, new_version, + pup, date, absolute_uri, exclude_proposed=False +) +print(f"Crash rate analysis: {result}") diff --git a/examples/cassie_functions/get_package_for_bucket.py b/examples/cassie_functions/get_package_for_bucket.py new file mode 100644 index 0000000..53e96a5 --- /dev/null +++ b/examples/cassie_functions/get_package_for_bucket.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +"""Example usage of get_package_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_package_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get package information for a bucket +bucketid = "example_bucket_id_12345" + +package, version = get_package_for_bucket(bucketid) +print(f"Package: {package}") +print(f"Version: {version}") diff --git a/examples/cassie_functions/get_package_new_buckets.py b/examples/cassie_functions/get_package_new_buckets.py new file mode 100644 index 0000000..c99fbf5 --- /dev/null +++ b/examples/cassie_functions/get_package_new_buckets.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +"""Example usage of get_package_new_buckets function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_package_new_buckets + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get new buckets for a package version +src_pkg = "firefox" +previous_version = "120.0" +new_version = "121.0" + +new_buckets = get_package_new_buckets(src_pkg, previous_version, new_version) +print(f"Found {len(new_buckets)} new buckets") +for bucket in new_buckets[:5]: + print(f"Bucket: {bucket}") diff --git a/examples/cassie_functions/get_problem_for_hash.py b/examples/cassie_functions/get_problem_for_hash.py new file mode 100644 index 0000000..124c1fb --- /dev/null +++ b/examples/cassie_functions/get_problem_for_hash.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +"""Example usage of get_problem_for_hash function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_problem_for_hash + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get problem bucket for a hash +hashed = "3f322b0f41718376ceefaf12fe3c69c046b6f643" + +problem = get_problem_for_hash(hashed) +if problem: + print(f"Problem bucket: {problem}") +else: + print("No problem found for hash") diff --git a/examples/cassie_functions/get_retrace_failure_for_bucket.py b/examples/cassie_functions/get_retrace_failure_for_bucket.py new file mode 100644 index 0000000..48ccac8 --- /dev/null +++ b/examples/cassie_functions/get_retrace_failure_for_bucket.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +"""Example usage of get_retrace_failure_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_retrace_failure_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get retrace failure information +bucketid = "example_bucket_id_12345" + +failure_data = get_retrace_failure_for_bucket(bucketid) +print(f"Retrace failure data: {failure_data}") diff --git a/examples/cassie_functions/get_retracer_count.py b/examples/cassie_functions/get_retracer_count.py new file mode 100644 index 0000000..278325d --- /dev/null +++ b/examples/cassie_functions/get_retracer_count.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +"""Example usage of get_retracer_count function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_retracer_count + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get retracer count for a specific date +date = "20231115" + +count_data = get_retracer_count(date) +print(f"Retracer count data: {count_data}") diff --git a/examples/cassie_functions/get_retracer_counts.py b/examples/cassie_functions/get_retracer_counts.py new file mode 100644 index 0000000..8f50ecd --- /dev/null +++ b/examples/cassie_functions/get_retracer_counts.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +"""Example usage of get_retracer_counts function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_retracer_counts + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get retracer counts for a date range +start = 0 +finish = 7 + +for date, counts in get_retracer_counts(start, finish): + print(f"Date: {date}") + print(f"Counts: {counts}") + break # Show first result only diff --git a/examples/cassie_functions/get_retracer_means.py b/examples/cassie_functions/get_retracer_means.py new file mode 100644 index 0000000..24e09c7 --- /dev/null +++ b/examples/cassie_functions/get_retracer_means.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +"""Example usage of get_retracer_means function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_retracer_means + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get retracer means for date range +start = 0 +finish = 7 + +for date, means in get_retracer_means(start, finish): + print(f"Date: {date}") + print(f"Means: {means}") + break # Show first result only diff --git a/examples/cassie_functions/get_signatures_for_bug.py b/examples/cassie_functions/get_signatures_for_bug.py new file mode 100644 index 0000000..e792137 --- /dev/null +++ b/examples/cassie_functions/get_signatures_for_bug.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +"""Example usage of get_signatures_for_bug function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_signatures_for_bug + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get crash signatures for a bug +bug = 123456 # Launchpad bug number + +signatures = get_signatures_for_bug(bug) +print(f"Found {len(signatures)} signatures") +for signature in signatures[:5]: + print(f"Signature: {signature}") diff --git a/examples/cassie_functions/get_source_package_for_bucket.py b/examples/cassie_functions/get_source_package_for_bucket.py new file mode 100644 index 0000000..06aa058 --- /dev/null +++ b/examples/cassie_functions/get_source_package_for_bucket.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +"""Example usage of get_source_package_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_source_package_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get source package for a bucket +bucketid = "example_bucket_id_12345" + +source_package = get_source_package_for_bucket(bucketid) +print(f"Source package: {source_package}") diff --git a/examples/cassie_functions/get_stacktrace_for_bucket.py b/examples/cassie_functions/get_stacktrace_for_bucket.py new file mode 100644 index 0000000..ae87d69 --- /dev/null +++ b/examples/cassie_functions/get_stacktrace_for_bucket.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +"""Example usage of get_stacktrace_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_stacktrace_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get stacktrace for a bucket +bucketid = "example_bucket_id_12345" + +stacktrace, thread_stacktrace = get_stacktrace_for_bucket(bucketid) +if stacktrace: + print(f"Stacktrace: {stacktrace[:200]}...") +if thread_stacktrace: + print(f"Thread Stacktrace: {thread_stacktrace[:200]}...") diff --git a/examples/cassie_functions/get_system_image_versions.py b/examples/cassie_functions/get_system_image_versions.py new file mode 100644 index 0000000..c8718e1 --- /dev/null +++ b/examples/cassie_functions/get_system_image_versions.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +"""Example usage of get_system_image_versions function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_system_image_versions + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get versions for a system image type +image_type = "ubuntu-touch" + +versions = get_system_image_versions(image_type) +if versions: + print(f"Found {len(versions)} versions") + for version in versions[:5]: + print(f"Version: {version}") +else: + print("No versions found") diff --git a/examples/cassie_functions/get_total_buckets_by_day.py b/examples/cassie_functions/get_total_buckets_by_day.py new file mode 100644 index 0000000..dff8b05 --- /dev/null +++ b/examples/cassie_functions/get_total_buckets_by_day.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +"""Example usage of get_total_buckets_by_day function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_total_buckets_by_day + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get bucket counts for the past 7 days +start = 0 +finish = 7 + +result = get_total_buckets_by_day(start, finish) +for date, count in result: + print(f"Date: {date}, Count: {count}") diff --git a/examples/cassie_functions/get_traceback_for_bucket.py b/examples/cassie_functions/get_traceback_for_bucket.py new file mode 100644 index 0000000..e6e529d --- /dev/null +++ b/examples/cassie_functions/get_traceback_for_bucket.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +"""Example usage of get_traceback_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_traceback_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get traceback for a bucket +bucketid = "example_bucket_id_12345" + +traceback = get_traceback_for_bucket(bucketid) +if traceback: + print(f"Traceback: {traceback[:200]}...") # Show first 200 chars +else: + print("No traceback found") diff --git a/examples/cassie_functions/get_user_crashes.py b/examples/cassie_functions/get_user_crashes.py new file mode 100644 index 0000000..6fbeeda --- /dev/null +++ b/examples/cassie_functions/get_user_crashes.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +"""Example usage of get_user_crashes function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_user_crashes + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get crashes for a specific user +user_token = "example_user_token_12345" +limit = 20 + +crashes = get_user_crashes(user_token, limit=limit) +print(f"Found {len(crashes)} user crashes") +for crash_id, timestamp in crashes[:5]: + print(f"Crash: {crash_id}, Timestamp: {timestamp}") diff --git a/examples/cassie_functions/get_versions_for_bucket.py b/examples/cassie_functions/get_versions_for_bucket.py new file mode 100644 index 0000000..9659427 --- /dev/null +++ b/examples/cassie_functions/get_versions_for_bucket.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +"""Example usage of get_versions_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import get_versions_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Get versions for a bucket +bucketid = "example_bucket_id_12345" + +versions = get_versions_for_bucket(bucketid) +print(f"Versions: {versions}") +for version, count in list(versions.items())[:5]: + print(f"Version: {version}, Count: {count}") diff --git a/examples/cassie_functions/record_bug_for_bucket.py b/examples/cassie_functions/record_bug_for_bucket.py new file mode 100644 index 0000000..84eb736 --- /dev/null +++ b/examples/cassie_functions/record_bug_for_bucket.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +"""Example usage of record_bug_for_bucket function.""" + +import sys +sys.path.insert(0, '../../src') + +from errortracker.cassandra import setup_cassandra +from errors.cassie import record_bug_for_bucket + +# Setup Cassandra connection +setup_cassandra() + +# Example: Record a bug for a bucket +bucketid = "example_bucket_id_12345" +bug = 123456 # Launchpad bug number + +record_bug_for_bucket(bucketid, bug) +print(f"Recorded bug {bug} for bucket {bucketid}") diff --git a/src/daisy/submit.py b/src/daisy/submit.py index 4707d65..153f63a 100644 --- a/src/daisy/submit.py +++ b/src/daisy/submit.py @@ -36,22 +36,6 @@ logger = logging.getLogger("daisy") -def update_counters(release, src_package, date, src_version=None): - if src_version: - key = "%s:%s:%s" % (release, src_package, src_version) - else: - key = "%s:%s" % (release, src_package) - cassandra_schema.Counters(key=key.encode(), column1=date).update(value=1) - - -def update_proposed_counters(release, src_package, date, src_version=None): - if src_version: - key = "%s:%s:%s" % (release, src_package, src_version) - else: - key = "%s:%s" % (release, src_package) - cassandra_schema.CountersForProposed(key=key.encode(), column1=date).update(value=1) - - def create_minimal_report_from_bson(data): report = Report() for key in data: @@ -221,21 +205,6 @@ def submit(request, system_token): problem_type, release, package, version, pkg_arch ) - # generic counter for crashes about a source package which is used by the - # phased-updater and only includes official Ubuntu packages and not those - # crahses from systems under auto testing. - if not third_party and not automated_testing and problem_type == "Crash": - update_counters(release=release, src_package=src_package, date=day_key) - if version == "": - metrics.meter("missing.missing_package_version") - else: - update_counters( - release=release, - src_package=src_package, - src_version=version, - date=day_key, - ) - # ProcMaps is useful for creating a crash sig, not after that if "Traceback" in data and "ProcMaps" in data: data.pop("ProcMaps") @@ -262,18 +231,6 @@ def submit(request, system_token): package_from_proposed = False if "package-from-proposed" in tags: package_from_proposed = True - # generic counter for crashes about a source package which is used by - # the phased-updater and only includes official Ubuntu packages and - # not those from systems under auto testing. - if not third_party and not automated_testing and problem_type == "Crash": - update_proposed_counters(release=release, src_package=src_package, date=day_key) - if version != "": - update_proposed_counters( - release=release, - src_package=src_package, - src_version=version, - date=day_key, - ) # A device is manually blocklisted if it has repeatedly failed to have an # crash inserted into the OOPS table. diff --git a/src/errors/cassie.py b/src/errors/cassie.py index fccd9c0..cb4354f 100644 --- a/src/errors/cassie.py +++ b/src/errors/cassie.py @@ -1,21 +1,41 @@ import datetime import operator +import struct import sys import time import urllib.error import urllib.parse import urllib.request from functools import cmp_to_key +from uuid import UUID import numpy -# TODO: port that to the cassandra module -# import pycassa -# from pycassa.cassandra.ttypes import NotFoundException -# from pycassa.util import OrderedDict from errortracker import cassandra, config - -session = cassandra.cassandra_session() +from errortracker.cassandra_schema import ( + Bucket, + BucketMetadata, + BucketRetraceFailureReason, + BucketVersionsCount, + BucketVersionSystems2, + BugToCrashSignatures, + Counters, + CountersForProposed, + DayBucketsCount, + DoesNotExist, + Hashes, + Indexes, + OOPS, + RetraceStats, + SourceVersionBuckets, + Stacktrace, + SystemImages, + UniqueUsers90Days, + UserBinaryPackages, + UserOOPS, +) + +session = cassandra.cassandra_session def _split_into_dictionaries(original): @@ -27,15 +47,15 @@ def _split_into_dictionaries(original): return value -def _get_range_of_dates(start, finish): +def _get_range_of_dates(start_x_days_ago: int, finish_x_days_ago: int) -> list[str]: """Get a range of dates from start to finish. This is necessary because we use the Cassandra random partitioner, so lexicographical ranges are not possible.""" - finish = finish - start - date = datetime.datetime.utcnow() - datetime.timedelta(days=start) + finish_x_days_ago = finish_x_days_ago - start_x_days_ago + date = datetime.datetime.utcnow() - datetime.timedelta(days=start_x_days_ago) delta = datetime.timedelta(days=1) dates = [] - for i in range(finish): + for i in range(finish_x_days_ago): dates.append(date.strftime("%Y%m%d")) date = date - delta return dates @@ -43,26 +63,26 @@ def _get_range_of_dates(start, finish): def get_oopses_by_day(date, limit=1000): """All of the OOPSes in the given day.""" - oopses_by_day = session.prepare('SELECT value FROM crashdb."DayOOPS" WHERE key = ? LIMIT ?;') - for row in session.execute(oopses_by_day, [date, limit]): + oopses_by_day = session().prepare('SELECT value FROM crashdb."DayOOPS" WHERE key = ? LIMIT ?;') + for row in session().execute(oopses_by_day, [date, limit]): yield row.value def get_oopses_by_release(release, limit=1000): """All of the OOPSes in the given release.""" - oopses_by_release = session.prepare( + oopses_by_release = session().prepare( 'SELECT column1 FROM crashdb."ErrorsByRelease" WHERE key = ? LIMIT ? ALLOW FILTERING;' ) - for row in session.execute(oopses_by_release, [release.encode(), limit]): + for row in session().execute(oopses_by_release, [release.encode(), limit]): yield row.column1 def get_total_buckets_by_day(start, finish): """All of the buckets added to for the past seven days.""" - daybucketscount_cf = pycassa.ColumnFamily(pool, "DayBucketsCount") dates = _get_range_of_dates(start, finish) for date in dates: - yield (date, daybucketscount_cf.get_count(date)) + count = DayBucketsCount.objects.filter(key=date.encode()).count() + yield (date, count) def _date_range_iterator(start, finish): @@ -93,7 +113,6 @@ def get_bucket_counts( """The number of times each bucket has been added to today, this month, or this year.""" - daybucketscount_cf = pycassa.ColumnFamily(pool, "DayBucketsCount") periods = "" if period: if period == "today" or period == "day": @@ -150,31 +169,25 @@ def get_bucket_counts( keys.append(key) results = {} - batch_size = 500 for key in keys: - start = "" - while True: - try: - result = daybucketscount_cf.get(key, column_start=start, column_count=batch_size) - except NotFoundException: - break - - for column, count in result.items(): + try: + rows = DayBucketsCount.objects.filter(key=key.encode()).all() + for row in rows: + column = row.column1 + count = row.value if not show_failed and column.startswith("failed"): continue - column = column.encode("utf-8") + if isinstance(column, str): + column = column.encode("utf-8") try: existing = results[column] except KeyError: existing = 0 results[column] = count + existing - # We do not want to include the end of the previous batch. - start = column + "0" - if len(result) < batch_size: - break - return sorted( - list(results.items()), key=cmp_to_key(lambda x, y: cmp(x[1], y[1])), reverse=True - ) + except DoesNotExist: + continue + + return sorted(list(results.items()), key=lambda x: x[1], reverse=True) def get_crashes_for_bucket(bucketid, limit=100, start=None): @@ -184,50 +197,61 @@ def get_crashes_for_bucket(bucketid, limit=100, start=None): We show the most recent crashes first, since they'll be the most relevant to the current state of the problem. """ - bucket_cf = pycassa.ColumnFamily(pool, "Bucket") try: + query = Bucket.objects.filter(key=bucketid).order_by("-column1") if start: - start = pycassa.util.uuid.UUID(start) - return list( - bucket_cf.get( - bucketid, column_start=start, column_count=limit, column_reversed=True - ).keys() - )[1:] - else: - return list(bucket_cf.get(bucketid, column_count=limit, column_reversed=True).keys()) - except NotFoundException: + start_uuid = UUID(start) + # Get items less than start (because of reversed ordering) + query = query.filter(column1__lt=start_uuid) + + return [row.column1 for row in list(query.limit(limit).all())] + except DoesNotExist: return [] def get_package_for_bucket(bucketid): """Returns the package and version for a given bucket.""" - bucket_cf = pycassa.ColumnFamily(pool, "Bucket") - oops_cf = pycassa.ColumnFamily(pool, "OOPS") # Grab 5 OOPS IDs, just in case the first one doesn't have a Package field. try: - oopsids = list(bucket_cf.get(bucketid, column_count=5).keys()) - except NotFoundException: + rows = Bucket.objects.filter(key=bucketid).limit(5).all() + oopsids = [row.column1 for row in rows] + except DoesNotExist: return ("", "") + for oopsid in oopsids: try: - oops = oops_cf.get(str(oopsid), columns=["Package"]) - package_and_version = oops["Package"].split()[:2] - if len(package_and_version) == 1: - return (package_and_version[0], "") - else: - return package_and_version - except (KeyError, NotFoundException): + oops_rows = OOPS.objects.filter(key=str(oopsid).encode(), column1="Package").all() + for row in oops_rows: + value = row.value + if isinstance(value, bytes): + value = value.decode("utf-8") + package_and_version = value.split()[:2] + if len(package_and_version) == 1: + return (package_and_version[0], "") + else: + return tuple(package_and_version) + except (KeyError, DoesNotExist): continue return ("", "") def get_crash(oopsid, columns=None): - oops_cf = pycassa.ColumnFamily(pool, "OOPS") try: - oops = oops_cf.get(oopsid, columns=columns) - except NotFoundException: + query = OOPS.objects.filter(key=oopsid.encode()) + if columns: + # Filter by specific columns + query = query.filter(column1__in=columns) + + oops = {} + for row in query.all(): + oops[row.column1] = row.value + + if not oops: + return {} + except DoesNotExist: return {} + if "StacktraceAddressSignature" in oops: SAS = oops["StacktraceAddressSignature"] if not SAS: @@ -239,49 +263,59 @@ def get_crash(oopsid, columns=None): return oops else: return oops + try: - indexes_cf = pycassa.ColumnFamily(pool, "Indexes") - idx = "crash_signature_for_stacktrace_address_signature" - bucket = indexes_cf.get(idx, [SAS]) - oops["SAS"] = bucket[SAS] + index_key = b"crash_signature_for_stacktrace_address_signature" + index_rows = Indexes.objects.filter(key=index_key, column1=SAS).all() + for row in index_rows: + oops["SAS"] = row.value.decode() if isinstance(row.value, bytes) else row.value + break return oops - except NotFoundException: + except DoesNotExist: return oops - return oops def get_traceback_for_bucket(bucketid): - oops_cf = pycassa.ColumnFamily(pool, "OOPS") # TODO fetching a crash ID twice, once here and once in get_stacktrace, is # a bit rubbish, but we'll write the stacktrace into the bucket at some # point and get rid of the contents of both of these functions. - if len(get_crashes_for_bucket(bucketid, 1)) == 0: + crashes = get_crashes_for_bucket(bucketid, 1) + if len(crashes) == 0: return None - crash = str(get_crashes_for_bucket(bucketid, 1)[0]) + crash = str(crashes[0]) try: - return oops_cf.get(crash, columns=["Traceback"])["Traceback"] - except NotFoundException: + rows = OOPS.objects.filter(key=crash.encode(), column1="Traceback").all() + for row in rows: + return row.value + return None + except DoesNotExist: return None def get_stacktrace_for_bucket(bucketid): - stacktrace_cf = pycassa.ColumnFamily(pool, "Stacktrace") - oops_cf = pycassa.ColumnFamily(pool, "OOPS") # TODO: we should build some sort of index for this. SAS = "StacktraceAddressSignature" cols = ["Stacktrace", "ThreadStacktrace"] for crash in get_crashes_for_bucket(bucketid, 10): sas = None try: - sas = oops_cf.get(str(crash), columns=[SAS])[SAS] - except NotFoundException: + rows = OOPS.objects.filter(key=str(crash).encode(), column1=SAS).all() + for row in rows: + sas = row.value + break + except DoesNotExist: pass if not sas: continue try: - traces = stacktrace_cf.get(sas, columns=cols) + traces = {} + sas_key = sas.encode() if isinstance(sas, str) else sas + for col in cols: + trace_rows = Stacktrace.objects.filter(key=sas_key, column1=col).all() + for row in trace_rows: + traces[col] = row.value return (traces.get("Stacktrace", None), traces.get("ThreadStacktrace", None)) - except NotFoundException: + except DoesNotExist: pass # We didn't have a stack trace for any of the signatures in this set of # crashes. @@ -292,44 +326,56 @@ def get_stacktrace_for_bucket(bucketid): def get_retracer_count(date): - retracestats_cf = pycassa.ColumnFamily(pool, "RetraceStats") - result = retracestats_cf.get(date) - return _split_into_dictionaries(result) + try: + result = RetraceStats.get_as_dict(key=date.encode() if isinstance(date, str) else date) + return _split_into_dictionaries(result) + except DoesNotExist: + return {} def get_retracer_counts(start, finish): - retracestats_cf = pycassa.ColumnFamily(pool, "RetraceStats") if finish == sys.maxsize: - start = datetime.date.today() - datetime.timedelta(days=start) - start = start.strftime("%Y%m%d") - results = retracestats_cf.get_range() - return ( - (date, _split_into_dictionaries(result)) for date, result in results if date < start - ) + start_date = datetime.date.today() - datetime.timedelta(days=start) + start_str = start_date.strftime("%Y%m%d") + # Get all dates from RetraceStats + all_rows = RetraceStats.objects.all() + results_dict = {} + for row in all_rows: + date_key = row.key.decode() if isinstance(row.key, bytes) else row.key + if date_key < start_str: + if date_key not in results_dict: + results_dict[date_key] = {} + results_dict[date_key][row.column1] = row.value + return ((date, _split_into_dictionaries(result)) for date, result in results_dict.items()) else: dates = _get_range_of_dates(start, finish) - results = retracestats_cf.multiget(dates) + results = {} + for date in dates: + try: + result = RetraceStats.get_as_dict(key=date.encode()) + results[date] = result + except DoesNotExist: + pass return ((date, _split_into_dictionaries(results[date])) for date in results) def get_retracer_means(start, finish): - indexes_cf = pycassa.ColumnFamily(pool, "Indexes") - start = datetime.date.today() - datetime.timedelta(days=start) - start = start.strftime("%Y%m%d") - finish = datetime.date.today() - datetime.timedelta(days=finish) - finish = finish.strftime("%Y%m%d") + start_date = datetime.date.today() - datetime.timedelta(days=start) + start_str = start_date.strftime("%Y%m%d") + finish_date = datetime.date.today() - datetime.timedelta(days=finish) + finish_str = finish_date.strftime("%Y%m%d") # FIXME: We shouldn't be specifying a maximum number of columns - timings = indexes_cf.get( - "mean_retracing_time", - column_start=start, - column_finish=finish, - column_count=1000, - column_reversed=True, - ) - to_float = pycassa.marshal.unpacker_for("FloatType") - result = OrderedDict() + try: + timings = Indexes.get_as_dict(key=b"mean_retracing_time") + except DoesNotExist: + return iter([]) + + result = dict() for timing in timings: + # Filter by date range + if timing < start_str or timing> finish_str: + continue if not timing.endswith(":count"): branch = result parts = timing.split(":") @@ -342,14 +388,13 @@ def get_retracer_means(start, finish): end = parts[-1] for part in parts: if part is end: - branch[part] = to_float(timings[timing]) + branch[part] = timings[timing] else: branch = branch.setdefault(part, {}) return iter(result.items()) def get_crash_count(start, finish, release=None): - counters_cf = pycassa.ColumnFamily(pool, "Counters") dates = _get_range_of_dates(start, finish) for date in dates: try: @@ -357,26 +402,38 @@ def get_crash_count(start, finish, release=None): key = "oopses:%s" % release else: key = "oopses" - oopses = int(counters_cf.get(key, columns=[date])[date]) - yield (date, oopses) - except NotFoundException: + rows = Counters.objects.filter(key=key.encode(), column1=date).all() + for row in rows: + oopses = int(row.value) + yield (date, oopses) + break + except DoesNotExist: pass -def get_metadata_for_bucket(bucketid, release=None): - bucketmetadata_cf = pycassa.ColumnFamily(pool, "BucketMetadata") +def get_metadata_for_bucket(bucketid: str, release: str = None): try: if not release: - return bucketmetadata_cf.get(bucketid, column_finish="~") + # Get all columns up to "~" (non-inclusive) + rows = BucketMetadata.objects.filter(key=bucketid.encode(), column1__lt="~").all() else: - ret = bucketmetadata_cf.get(bucketid) + rows = BucketMetadata.objects.filter(key=bucketid.encode()).all() + + ret = {} + for row in rows: + ret[row.column1] = row.value + + if release and ret: try: ret["FirstSeen"] = ret["~%s:FirstSeen" % release] + except KeyError: + pass + try: ret["LastSeen"] = ret["~%s:LastSeen" % release] except KeyError: pass - return ret - except NotFoundException: + return ret + except DoesNotExist: return {} @@ -388,66 +445,57 @@ def chunks(l, n): def get_metadata_for_buckets(bucketids, release=None): - bucketmetadata_cf = pycassa.ColumnFamily(pool, "BucketMetadata") - ret = OrderedDict() - for buckets in chunks(bucketids, 5): - if not release: - ret.update(bucketmetadata_cf.multiget(buckets, column_finish="~")) - else: - ret.update(bucketmetadata_cf.multiget(buckets)) - if release: - for bucket in ret: - bucket = ret[bucket] - try: - bucket["FirstSeen"] = bucket["~%s:FirstSeen" % release] - bucket["LastSeen"] = bucket["~%s:LastSeen" % release] - except KeyError: - # Rather than confuse developers with half release-specific - # data. Of course this will only apply for the current row, so - # it's possible subsequent rows will show release-specific - # data. - if "FirstSeen" in bucket: - del bucket["FirstSeen"] - if "LastSeen" in bucket: - del bucket["LastSeen"] + ret = dict() + for bucketid in bucketids: + ret[bucketid] = get_metadata_for_bucket(bucketid, release) return ret def get_user_crashes(user_token, limit=50, start=None): - useroops_cf = pycassa.ColumnFamily(pool, "UserOOPS") results = {} try: + user_key = user_token.encode() if isinstance(user_token, str) else user_token + query = UserOOPS.objects.filter(key=user_key) + if start: - start = pycassa.util.uuid.UUID(start) - result = useroops_cf.get( - user_token, column_start=start, column_count=limit, include_timestamp=True - ) - else: - result = useroops_cf.get(user_token, column_count=limit, include_timestamp=True) - for r in result: - results[r] = {"submitted": result[r]} - start = list(result.keys())[-1] + "0" - except NotFoundException: + # Filter to get items greater than start + query = query.filter(column1__gt=start) + + rows = list(query.limit(limit).all()) + + for row in rows: + # Since we don't have timestamp directly, we'll use the column1 as a proxy + results[row.column1] = {"submitted": row.column1} + except DoesNotExist: return [] + return [ - (k[0], k[1]) - for k in sorted(iter(results.items()), key=operator.itemgetter(1), reverse=True) + (k, results[k]["submitted"]) + for k in sorted(results.keys(), key=lambda x: results[x]["submitted"], reverse=True) ] def get_average_crashes(field, release, days=7): - uniqueusers_cf = pycassa.ColumnFamily(pool, "UniqueUsers90Days") - counters_cf = pycassa.ColumnFamily(pool, "Counters") dates = _get_range_of_dates(0, days) start = dates[-1] end = dates[0] + try: key = "oopses:%s" % field - g = counters_cf.xget(key, column_start=start, column_finish=end) - oopses = pycassa.util.OrderedDict(x for x in g) - g = uniqueusers_cf.xget(release, column_start=start, column_finish=end) - users = pycassa.util.OrderedDict(x for x in g) - except NotFoundException: + oopses = dict() + oops_rows = Counters.objects.filter( + key=key.encode(), column1__gte=start, column1__lte=end + ).all() + for row in oops_rows: + oopses[row.column1] = row.value + + users = dict() + user_rows = UniqueUsers90Days.objects.filter( + key=release, column1__gte=start, column1__lte=end + ).all() + for row in user_rows: + users[row.column1] = row.value + except DoesNotExist: return [] return_data = [] @@ -462,8 +510,6 @@ def get_average_crashes(field, release, days=7): def get_average_instances(bucketid, release, days=7): - uniqueusers_cf = pycassa.ColumnFamily(pool, "UniqueUsers90Days") - daybucketscount_cf = pycassa.ColumnFamily(pool, "DayBucketsCount") # FIXME Why oh why did we do things this way around? It makes it impossible # to do a quick range scan. We should create DayBucketsCount2, replacing # this with a CF that's keyed on the bucket ID and has counter columns @@ -471,12 +517,23 @@ def get_average_instances(bucketid, release, days=7): dates = _get_range_of_dates(0, days) start = dates[-1] end = dates[0] - gen = uniqueusers_cf.xget(release, column_start=start, column_finish=end) - users = dict(x for x in gen) + + user_rows = UniqueUsers90Days.objects.filter( + key=release, column1__gte=start, column1__lte=end + ).all() + users = {row.column1: row.value for row in user_rows} + for date in dates: try: - count = daybucketscount_cf.get("%s:%s" % (release, date), columns=[bucketid])[bucketid] - except NotFoundException: + key = "%s:%s" % (release, date) + count_rows = DayBucketsCount.objects.filter(key=key.encode(), column1=bucketid).all() + count = None + for row in count_rows: + count = row.value + break + if count is None: + continue + except DoesNotExist: continue try: avg = float(count) / float(users[date]) @@ -490,54 +547,67 @@ def get_versions_for_bucket(bucketid): """Get the dictionary of (release, version) tuples for the given bucket with values of their instance counts. If the bucket does not exist, return an empty dict.""" - bv_count_cf = pycassa.ColumnFamily(pool, "BucketVersionsCount") try: - return bv_count_cf.get(bucketid) - except NotFoundException: + bucket_key = bucketid.encode() if isinstance(bucketid, str) else bucketid + rows = BucketVersionsCount.objects.filter(key=bucket_key).all() + result = {} + for row in rows: + result[row.column1] = row.value + return result + except DoesNotExist: return {} def get_source_package_for_bucket(bucketid): - oops_cf = pycassa.ColumnFamily(pool, "OOPS") - bucket_cf = pycassa.ColumnFamily(pool, "Bucket") - oopsids = list(bucket_cf.get(bucketid, column_count=10).keys()) + bucket_rows = Bucket.objects.filter(key=bucketid).limit(10).all() + oopsids = [row.column1 for row in bucket_rows] for oopsid in oopsids: try: - oops = oops_cf.get(str(oopsid), columns=["SourcePackage"]) - return oops["SourcePackage"] - except (KeyError, NotFoundException): + oops_rows = OOPS.objects.filter( + key=str(oopsid).encode(), column1="SourcePackage" + ).all() + for row in oops_rows: + return row.value + except (KeyError, DoesNotExist): continue return "" def get_retrace_failure_for_bucket(bucketid): - bucketretracefail_fam = pycassa.ColumnFamily(pool, "BucketRetraceFailureReason") try: - failuredata = bucketretracefail_fam.get(bucketid) + failuredata = BucketRetraceFailureReason.get_as_dict( + key=bucketid.encode() if isinstance(bucketid, str) else bucketid + ) return failuredata - except NotFoundException: + except DoesNotExist: return {} def get_binary_packages_for_user(user): # query DayBucketsCount to ensure the package has crashes reported about # it rather than returning packages for which there will be no data. - daybucketscount_cf = pycassa.ColumnFamily(pool, "DayBucketsCount") - userbinpkgs_cf = pycassa.ColumnFamily(pool, "UserBinaryPackages") # if a package's last crash was reported more than a month ago then it # won't be returned here, however the package isn't likely to appear in # the most-common-problems. + # XXX: that 30 days delta + %Y%m doesn't seem to produce a nice sliding + # time window. Is this expected? apparently yes, but that seems a bit wrong period = (datetime.date.today() - datetime.timedelta(30)).strftime("%Y%m") try: - binary_packages = [pkg[0] + ":%s" % period for pkg in userbinpkgs_cf.xget(user)] - except NotFoundException: + pkg_rows = UserBinaryPackages.objects.filter(key=user).all() + binary_packages = [row.column1 + ":%s" % period for row in pkg_rows] + except DoesNotExist: return None if len(binary_packages) == 0: return None - results = daybucketscount_cf.multiget_count(binary_packages, max_count=1) - for result in results: - if results[result] == 0: - del results[result] + + results = {} + for pkg in binary_packages: + count = DayBucketsCount.objects.filter(key=pkg.encode()).limit(1).count() + # remove packages that don't have recent crashes + if count> 0: + results[pkg] = count + + # trim the date suffix to only keep the package name return [k[0:-7] for k in list(results.keys())] @@ -546,43 +616,66 @@ def get_package_crash_rate( ): """Find the rate of Crashes, not other problems, about a package.""" - counters_cf = pycassa.ColumnFamily(pool, "Counters") - proposed_counters_cf = pycassa.ColumnFamily(pool, "CountersForProposed") # the generic counter only includes Crashes for packages from official # Ubuntu sources and from systems not under auto testing - old_vers_column = "%s:%s:%s" % (release, src_package, old_version) - new_vers_column = "%s:%s:%s" % (release, src_package, new_version) + old_vers_column = "oopses:Crash:%s:%s:%s" % (release, src_package, old_version) + new_vers_column = "oopses:Crash:%s:%s:%s" % (release, src_package, new_version) results = {} + try: - # The first thing done is the reversing of the order that's why it - # is column_start - old_vers_data = counters_cf.get( - old_vers_column, column_start=date, column_reversed=True, column_count=15 + old_rows = ( + Counters.objects.filter(key=old_vers_column.encode(), column1__lte=date) + .order_by("-column1") + .limit(15) + .all() ) - except NotFoundException: + old_vers_data = {row.column1: row.value for row in old_rows} + except DoesNotExist: old_vers_data = None + try: # this may be unnecessarily long since updates phase in ~3 days - new_vers_data = counters_cf.get(new_vers_column, column_reversed=True, column_count=15) - except NotFoundException: + new_rows = ( + Counters.objects.filter(key=new_vers_column.encode()) + .order_by("-column1") + .limit(15) + .all() + ) + print(new_rows) + new_vers_data = {row.column1: row.value for row in new_rows} + print(new_vers_data) + except DoesNotExist: + print("New data does not exist") results["increase"] = False return results + + if not new_vers_data: + print("No new data") + results["increase"] = False + return results + if exclude_proposed: try: - # The first thing done is the reversing of the order that's why it - # is column_start - proposed_old_vers_data = proposed_counters_cf.get( - old_vers_column, column_start=date, column_reversed=True, column_count=15 + proposed_old_rows = ( + CountersForProposed.objects.filter(key=old_vers_column.encode(), column1__lte=date) + .order_by("-column1") + .limit(15) + .all() ) - except NotFoundException: + proposed_old_vers_data = {row.column1: row.value for row in proposed_old_rows} + except DoesNotExist: proposed_old_vers_data = None try: - # this may be unnecessarily long since updates phase in ~3 days - proposed_new_vers_data = proposed_counters_cf.get( - new_vers_column, column_reversed=True, column_count=15 + proposed_new_rows = ( + CountersForProposed.objects.filter(key=new_vers_column.encode()) + .order_by("-column1") + .limit(15) + .all() ) - except NotFoundException: + proposed_new_vers_data = {row.column1: row.value for row in proposed_new_rows} + except DoesNotExist: proposed_new_vers_data = None + today = datetime.datetime.utcnow().strftime("%Y%m%d") try: today_crashes = new_vers_data[today] @@ -590,6 +683,7 @@ def get_package_crash_rate( # no crashes today so not an increase results["increase"] = False return results + # subtract CountersForProposed data from today crashes if exclude_proposed and proposed_new_vers_data: try: @@ -601,6 +695,7 @@ def get_package_crash_rate( # no crashes today so not an increase results["increase"] = False return results + if new_vers_data and not old_vers_data: results["increase"] = True results["previous_average"] = None @@ -613,6 +708,7 @@ def get_package_crash_rate( ) results["web_link"] = absolute_uri + web_link return results + first_date = date oldest_date = list(old_vers_data.keys())[-1] dates = [x for x in _date_range_iterator(oldest_date, first_date)] @@ -633,10 +729,12 @@ def get_package_crash_rate( # the day doesn't exist so there were 0 errors except KeyError: previous_vers_crashes.append(0) + results["increase"] = False # 2 crashes may be a fluke if today_crashes < 3: return results + now = datetime.datetime.utcnow() hour = float(now.hour) minute = float(now.minute) @@ -669,32 +767,60 @@ def get_package_crash_rate( def get_package_new_buckets(src_pkg, previous_version, new_version): - srcversionbuckets_cf = pycassa.ColumnFamily(pool, "SourceVersionBuckets") - bucketversionsystems_cf = pycassa.ColumnFamily(pool, "BucketVersionSystems2") results = [] + + # Ensure src_pkg and versions are strings for Ascii fields + src_pkg_str = src_pkg if isinstance(src_pkg, str) else src_pkg.decode("utf-8") + new_version_str = new_version if isinstance(new_version, str) else new_version.decode("utf-8") + previous_version_str = ( + previous_version if isinstance(previous_version, str) else previous_version.decode("utf-8") + ) + # new version has no buckets try: - n_data = [bucket[0] for bucket in srcversionbuckets_cf.xget((src_pkg, new_version))] - except KeyError: + new_rows = SourceVersionBuckets.objects.filter(key=src_pkg_str, key2=new_version_str).all() + n_data = [row.column1 for row in new_rows] + except (KeyError, DoesNotExist): return results + # if previous version has no buckets return an empty list try: - p_data = [bucket[0] for bucket in srcversionbuckets_cf.xget((src_pkg, previous_version))] - except KeyError: + prev_rows = SourceVersionBuckets.objects.filter( + key=src_pkg_str, key2=previous_version_str + ).all() + p_data = [row.column1 for row in prev_rows] + except (KeyError, DoesNotExist): p_data = [] new_buckets = set(n_data).difference(set(p_data)) for bucket in new_buckets: - if isinstance(bucket, str): - bucket = bucket.encode("utf-8") # do not return buckets that failed to retrace - if bucket.startswith("failed:"): + bucket_str = ( + bucket + if isinstance(bucket, str) + else bucket.decode("utf-8") + if isinstance(bucket, bytes) + else str(bucket) + ) + if bucket_str.startswith("failed:"): continue - if isinstance(new_version, str): - new_version = new_version.encode("utf-8") + + # BucketVersionSystems2 expects key as Text (string) + bucket_key = ( + bucket + if isinstance(bucket, str) + else bucket.decode("utf-8") + if isinstance(bucket, bytes) + else str(bucket) + ) try: - count = len(bucketversionsystems_cf.get((bucket, new_version), column_count=4)) - except NotFoundException: + count_rows = ( + BucketVersionSystems2.objects.filter(key=bucket_key, key2=new_version_str) + .limit(4) + .all() + ) + count = len(list(count_rows)) + except DoesNotExist: continue if count <= 2: continue @@ -703,51 +829,63 @@ def get_package_new_buckets(src_pkg, previous_version, new_version): def record_bug_for_bucket(bucketid, bug): - bucketmetadata_cf = pycassa.ColumnFamily(pool, "BucketMetadata") - bugtocrashsignatures_cf = pycassa.ColumnFamily(pool, "BugToCrashSignatures") # We don't insert bugs into the database if we're using Launchpad staging, # as those will disappear in Launchpad but our copy would persist. if config.lp_use_staging == "False": - bucketmetadata_cf.insert(bucketid, {"CreatedBug": bug}) - bugtocrashsignatures_cf.insert(int(bug), {bucketid: ""}) + # Prepare keys with proper encoding + bucket_key = bucketid.encode() if isinstance(bucketid, str) else bucketid + bug_key = str(int(bug)).encode() + + # BugToCrashSignatures expects column1 as Text (string) + bucketid_str = bucketid if isinstance(bucketid, str) else bucketid.decode("utf-8") + + # Insert into BucketMetadata + BucketMetadata.create(key=bucket_key, column1="CreatedBug", value=bug) + + # Insert into BugToCrashSignatures + BugToCrashSignatures.create(key=bug_key, column1=bucketid_str, value=b"") def get_signatures_for_bug(bug): try: - bug = int(bug) + bug_int = int(bug) except ValueError: return [] - bugtocrashsignatures_cf = pycassa.ColumnFamily(pool, "BugToCrashSignatures") try: - gen = bugtocrashsignatures_cf.xget(bug) - crashes = [crash for crash, unused in gen] + bug_key = str(bug_int).encode() + rows = BugToCrashSignatures.objects.filter(key=bug_key).all() + crashes = [row.column1 for row in rows] return crashes - except NotFoundException: + except DoesNotExist: return [] def bucket_exists(bucketid): - bucket_cf = pycassa.ColumnFamily(pool, "Bucket") try: - bucket_cf.get(bucketid, column_count=1) - return True - except NotFoundException: + count = Bucket.objects.filter(key=bucketid).limit(1).count() + return count> 0 + except DoesNotExist: return False def get_problem_for_hash(hashed): - hashes_cf = pycassa.ColumnFamily(pool, "Hashes") try: - return hashes_cf.get("bucket_%s" % hashed[0], columns=[hashed])[hashed] - except NotFoundException: + key = ("bucket_%s" % hashed[0]).encode() + hash_key = hashed.encode() if isinstance(hashed, str) else hashed + rows = Hashes.objects.filter(key=key, column1=hash_key).all() + for row in rows: + return row.value + return None + except DoesNotExist: return None def get_system_image_versions(image_type): - images_cf = pycassa.ColumnFamily(pool, "SystemImages") try: - versions = [version[0] for version in images_cf.xget(image_type)] + image_key = image_type.encode() if isinstance(image_type, str) else image_type + rows = SystemImages.objects.filter(key=image_key).all() + versions = [row.column1 for row in rows] return versions - except NotFoundException: + except DoesNotExist: return None diff --git a/src/errortracker/cassandra_schema.py b/src/errortracker/cassandra_schema.py index 4a28b86..d3ed1e4 100644 --- a/src/errortracker/cassandra_schema.py +++ b/src/errortracker/cassandra_schema.py @@ -13,8 +13,15 @@ class ErrorTrackerTable(models.Model): class Counters(ErrorTrackerTable): __table_name__ = "Counters" + # the index we count + # - Ubuntu 24.04:zsh:5.9-6ubuntu2 + # - Ubuntu 24.04:zsh key = columns.Blob(db_field="key", primary_key=True) + # a datestamp + # - 20251101 + # - 20240612 column1 = columns.Text(db_field="column1", primary_key=True) + # the count of crashes for that release:package[:version] that day value = columns.Counter(db_field="value") @@ -31,8 +38,9 @@ class Indexes(ErrorTrackerTable): column1 = columns.Text(db_field="column1", primary_key=True) value = columns.Blob(db_field="value") - def get_as_dict(*args, **kwargs) -> dict: - query = Indexes.objects.filter(*args, **kwargs) + @classmethod + def get_as_dict(cls, *args, **kwargs) -> dict: + query = cls.objects.filter(*args, **kwargs) d = {} for result in query: # XXX: cassandra should be able to deserialize more properly by itself @@ -43,7 +51,7 @@ def get_as_dict(*args, **kwargs) -> dict: else: d[result.column1] = result.value if not d: - raise Indexes.DoesNotExist + raise cls.DoesNotExist return d @@ -81,11 +89,12 @@ class OOPS(ErrorTrackerTable): column1 = columns.Text(db_field="column1", primary_key=True) value = columns.Text(db_field="value") - def get_as_dict(*args, **kwargs) -> dict: - query = OOPS.objects.filter(*args, **kwargs) + @classmethod + def get_as_dict(cls, *args, **kwargs) -> dict: + query = cls.objects.filter(*args, **kwargs) d = {} for result in query: - d[result["column1"]] = result["value"] + d[result.column1] = result.value return d @@ -105,15 +114,26 @@ class SystemOOPSHashes(ErrorTrackerTable): class BucketMetadata(ErrorTrackerTable): __table_name__ = "BucketMetadata" + # the bucket ID + # - /bin/zsh:11:makezleparams:execzlefunc:redrawhook:zlecore:zleread key = columns.Blob(db_field="key", primary_key=True) + # Which metadata + # - FirstSeen (package version) + # - LastSeen (package version) + # - FirstSeenRelease (Ubuntu series) + # - ~Ubuntu 25.04:LastSeen (package version) column1 = columns.Text(db_field="column1", primary_key=True) + # The corresponding value for the metadata + # - 5.9-6ubuntu2 (package version) + # - Ubuntu 18.04 (Ubuntu series) value = columns.Text(db_field="value") - def get_as_dict(*args, **kwargs) -> dict: - query = BucketMetadata.objects.filter(*args, **kwargs) + @classmethod + def get_as_dict(cls, *args, **kwargs) -> dict: + query = cls.objects.filter(*args, **kwargs) d = {} for result in query: - d[result["column1"]] = result["value"] + d[result.column1] = result.value return d @@ -130,11 +150,12 @@ class RetraceStats(ErrorTrackerTable): column1 = columns.Text(db_field="column1", primary_key=True) value = columns.Counter(db_field="value") - def get_as_dict(*args, **kwargs) -> dict: - query = RetraceStats.objects.filter(*args, **kwargs) + @classmethod + def get_as_dict(cls, *args, **kwargs) -> dict: + query = cls.objects.filter(*args, **kwargs) d = {} for result in query: - d[result["column1"]] = result["value"] + d[result.column1] = result.value return d @@ -155,8 +176,17 @@ class DayBuckets(ErrorTrackerTable): class DayBucketsCount(ErrorTrackerTable): __table_name__ = "DayBucketsCount" + # the index we count + # - Ubuntu 24.04:20251201 + # - zsh:amd64:20251201 + # - Crash:zsh:amd64:20251201 (No idea about the difference with the previous example) + # - package:tvtime:(not installed)\nSetting up tvtime (1.0.11-8build2) ...\ndpkg: error processing package tvtime (--configure):\n installed tvtime package post-installation script subprocess returned error exit status 1\n key = columns.Blob(db_field="key", primary_key=True) + # The bucketid we could: + # - /bin/zsh:11:__GI__IO_flush_all:_IO_cleanup:__run_exit_handlers:__GI_exit:zexit + # - /bin/brltty:*** buffer overflow detected ***: terminated column1 = columns.Text(db_field="column1", primary_key=True) + # the counter itself value = columns.Counter(db_field="value") @@ -182,11 +212,12 @@ class BucketRetraceFailureReason(ErrorTrackerTable): column1 = columns.Text(db_field="column1", primary_key=True) value = columns.Text(db_field="value") - def get_as_dict(*args, **kwargs) -> dict: - query = BucketRetraceFailureReason.objects.filter(*args, **kwargs) + @classmethod + def get_as_dict(cls, *args, **kwargs) -> dict: + query = cls.objects.filter(*args, **kwargs) d = {} for result in query: - d[result["column1"]] = result["value"] + d[result.column1] = result.value return d @@ -195,3 +226,59 @@ class AwaitingRetrace(ErrorTrackerTable): key = columns.Text(db_field="key", primary_key=True) column1 = columns.Text(db_field="column1", primary_key=True) value = columns.Text(db_field="value") + + +class ErrorsByRelease(ErrorTrackerTable): + __table_name__ = "ErrorsByRelease" + key = columns.Ascii(db_field="key", primary_key=True) + key2 = columns.DateTime(db_field="key2", primary_key=True) + column1 = columns.TimeUUID(db_field="column1", primary_key=True) + value = columns.DateTime(db_field="value") + + +class BucketVersionsCount(ErrorTrackerTable): + __table_name__ = "BucketVersionsCount" + key = columns.Text(db_field="key", primary_key=True) + column1 = columns.Ascii(db_field="column1", primary_key=True) + column2 = columns.Ascii(db_field="column2", primary_key=True) + value = columns.Counter(db_field="value") + + +class BugToCrashSignatures(ErrorTrackerTable): + __table_name__ = "BugToCrashSignatures" + key = columns.VarInt(db_field="key", primary_key=True) + column1 = columns.Text(db_field="column1", primary_key=True) + value = columns.Blob(db_field="value") + + +class SystemImages(ErrorTrackerTable): + __table_name__ = "SystemImages" + key = columns.Text(db_field="key", primary_key=True) + column1 = columns.Text(db_field="column1", primary_key=True) + value = columns.Blob(db_field="value") + + +class UniqueUsers90Days(ErrorTrackerTable): + __table_name__ = "UniqueUsers90Days" + # Ubuntu series ("Ubuntu 26.04", "Ubuntu 25.10", etc...) + key = columns.Text(db_field="key", primary_key=True) + # a datestamp ("20251101", "20240612", etc...) + column1 = columns.Text(db_field="column1", primary_key=True) + # the count of unique users of that release that day + value = columns.BigInt(db_field="value") + + +class UserBinaryPackages(ErrorTrackerTable): + __table_name__ = "UserBinaryPackages" + # a team that usually owns packages (like for MIR) + # - debcrafters-packages + # - foundations-bugs + # - xubuntu-bugs + key = columns.Ascii(db_field="key", primary_key=True) + # package names + # - abiword + # - util-linux + # looks to be binary packages only, but not 100% certain + column1 = columns.Ascii(db_field="column1", primary_key=True) + # looks unused + value = columns.Blob(db_field="value") diff --git a/src/errortracker/oopses.py b/src/errortracker/oopses.py index 37880cf..285f844 100644 --- a/src/errortracker/oopses.py +++ b/src/errortracker/oopses.py @@ -100,7 +100,11 @@ def _insert( :param ttl: boolean for setting the time to live for the column :return: The day which the oops was filed under. """ - day_key = time.strftime("%Y%m%d", time.gmtime()) + try: + # Try to get the actual day of that crash, otherwise fallback to today + day_key = time.strftime("%Y%m%d", time.strptime(insert_dict["Date"], "%c")) + except Exception: + day_key = time.strftime("%Y%m%d", time.gmtime()) now_uuid = uuid.uuid1() if ttl: diff --git a/src/tests/conftest.py b/src/tests/conftest.py index c4a198c..77cbf8a 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -6,8 +6,10 @@ """Test helpers for working with cassandra.""" +import locale import shutil import tempfile +from datetime import datetime, timedelta from pathlib import Path from unittest.mock import patch @@ -18,7 +20,7 @@ from errortracker import cassandra -@pytest.fixture(scope="function") +@pytest.fixture(scope="class") def temporary_db(): cassandra.KEYSPACE = "tmp" cassandra.REPLICATION_FACTOR = 1 @@ -27,7 +29,7 @@ def temporary_db(): management.drop_keyspace(cassandra.KEYSPACE) -@pytest.fixture(scope="function") +@pytest.fixture(scope="class") def retracer(temporary_db): temp = Path(tempfile.mkdtemp()) config_dir = temp / "config" @@ -45,3 +47,101 @@ def retracer(temporary_db): architecture=architecture, ) shutil.rmtree(temp) + + +@pytest.fixture(scope="module") +def datetime_now(): + return datetime.now() + + +@pytest.fixture(scope="class") +def cassandra_data(datetime_now, temporary_db): + import logging + + import bson + + from daisy.submit import submit + + # disable daisy logger temporarily + daisy_logger = logging.getLogger("daisy") + daisy_logger_level = daisy_logger.level + daisy_logger.setLevel(51) # CRITICAL is 50, so let's go higher + + # Make sure the datetime will get formatted "correctly" in that cursed time format: Mon May 5 14:46:10 2025 + locale.setlocale(locale.LC_ALL, "C.UTF-8") + + def count(): + counter = 0 + while True: + yield str(counter) + counter += 1 + + def new_oops(days_ago, data, systemid="imatestsystem"): + crash_date = datetime_now - timedelta(days=days_ago) + oops_date = crash_date.strftime("%c") + data.update({"Date": oops_date}) + bson_data = bson.encode(data) + request = type( + "Request", + (object,), + dict(data=bson_data, headers={"X-Whoopsie-Version": "0.2.81ubuntu~fakefortesting"}), + ) + submit(request, systemid) + + # Get a wide screen, because here we'll want to have compact data, meaning long lines π + # fmt: off + + # increase-rate package version 1 + for i in [30, 20, 10, 5, 2]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "increase-rate 1", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/increase-rate", "StacktraceAddressSignature": "/usr/bin/increase-rate:42:/usr/bin/increase-rate+28"}) + + # increase-rate package version 2 + for i in [2, 2, 1, 1, 1, 0, 0, 0, 0]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "increase-rate 2", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/increase-rate", "StacktraceAddressSignature": "/usr/bin/increase-rate:42:/usr/bin/increase-rate+fa0"}) + + # increase-rate package version 2 in proposed, even more crashes! + for i in [1, 0]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "increase-rate 2", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/increase-rate", "StacktraceAddressSignature": "/usr/bin/increase-rate:42:/usr/bin/increase-rate+fa0", "Tags": "package-from-proposed"}) + + # no-crashes-today package version 1 (old version with crashes) + for i in [30, 20, 10, 5, 2]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "no-crashes-today 1", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/no-crashes-today", "StacktraceAddressSignature": "/usr/bin/no-crashes-today:1:/usr/bin/no-crashes-today+10"}) + + # no-crashes-today package version 2 (no crashes today - last crash was yesterday) + for i in [5, 3, 1]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "no-crashes-today 2", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/no-crashes-today", "StacktraceAddressSignature": "/usr/bin/no-crashes-today:2:/usr/bin/no-crashes-today+20"}) + + # few-crashes package version 1 (old version with crashes) + for i in [30, 20, 10, 5, 2]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "few-crashes 1", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/few-crashes", "StacktraceAddressSignature": "/usr/bin/few-crashes:1:/usr/bin/few-crashes+10"}) + + # few-crashes package version 2 (only 2 crashes today - less than threshold of 3) + for i in [0, 0]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "few-crashes 2", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/few-crashes", "StacktraceAddressSignature": "/usr/bin/few-crashes:2:/usr/bin/few-crashes+20"}) + + # new-package (no old version - should always be increase=True) + for i in [0, 0, 0, 0, 0]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "new-package 1", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/new-package", "StacktraceAddressSignature": "/usr/bin/new-package:1:/usr/bin/new-package+10"}) + + # low-difference package version 1 (old version with consistent crashes) + for i in [30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "low-difference 1", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/low-difference", "StacktraceAddressSignature": "/usr/bin/low-difference:1:/usr/bin/low-difference+10"}) + + # low-difference package version 2 (similar crash rate to version 1, so difference should be low) + # Only 1 crash today which is less than the expected average + for i in [0]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "low-difference 2", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/low-difference", "StacktraceAddressSignature": "/usr/bin/low-difference:2:/usr/bin/low-difference+20"}) + + # all-proposed package version 1 + for i in [30, 20, 10]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "all-proposed 1", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/all-proposed", "StacktraceAddressSignature": "/usr/bin/all-proposed:1:/usr/bin/all-proposed+10"}) + + # all-proposed package version 2 (all crashes today are from proposed) + for i in [0, 0, 0, 0]: + new_oops(i, {"DistroRelease": "Ubuntu 24.04", "Package": "all-proposed 2", "ProblemType": "Crash", "Architecture": "amd64", "ExecutablePath": "/usr/bin/all-proposed", "StacktraceAddressSignature": "/usr/bin/all-proposed:2:/usr/bin/all-proposed+20", "Tags": "package-from-proposed"}) + # fmt: on + + # re-enable daisy logger + daisy_logger.setLevel(daisy_logger_level) + + yield diff --git a/src/tests/test_cassie.py b/src/tests/test_cassie.py new file mode 100644 index 0000000..dce3b81 --- /dev/null +++ b/src/tests/test_cassie.py @@ -0,0 +1,136 @@ +from datetime import timedelta + +import numpy +from pytest import approx + +from errors import cassie + + +class TestCassie: + def test_get_package_crash_rate_increase_rate(self, datetime_now, cassandra_data): + now = datetime_now + + crash_rate = cassie.get_package_crash_rate( + "Ubuntu 24.04", + "increase-rate", + "1", + "2", + "70", + (now - timedelta(days=0)).strftime("%Y%m%d"), + "https://errors.internal/", + ) + assert crash_rate == approx( + { + "increase": True, + "difference": numpy.float64(4.3), + "web_link": "https://errors.internal/?release=Ubuntu%2024.04&package=increase-rate&version=2", + "previous_period_in_days": 30, + "previous_average": numpy.float64(0.7), + }, + rel=1e-1, # We don't want much precision, Cassandra is already messing up the values + ) + + crash_rate = cassie.get_package_crash_rate( + "Ubuntu 24.04", + "increase-rate", + "1", + "2", + "70", + (now - timedelta(days=0)).strftime("%Y%m%d"), + "https://errors.internal/", + True, + ) + assert crash_rate == approx( + { + "increase": True, + "difference": numpy.float64(3.4), + "web_link": "https://errors.internal/?release=Ubuntu%2024.04&package=increase-rate&version=2", + "previous_period_in_days": 30, + "previous_average": numpy.float64(0.7), + }, + rel=1e-1, # We don't want much precision, Cassandra is already messing up the values + ) + + def test_get_package_crash_rate_no_crashes_today(self, datetime_now, cassandra_data): + """Test case where new version has no crashes today - should return increase=False""" + now = datetime_now + + crash_rate = cassie.get_package_crash_rate( + "Ubuntu 24.04", + "no-crashes-today", + "1", + "2", + "100", + (now - timedelta(days=0)).strftime("%Y%m%d"), + "https://errors.internal/", + ) + assert crash_rate == {"increase": False} + + def test_get_package_crash_rate_few_crashes(self, datetime_now, cassandra_data): + """Test case where new version has only 2 crashes today (less than threshold of 3) - should return increase=False""" + now = datetime_now + + crash_rate = cassie.get_package_crash_rate( + "Ubuntu 24.04", + "few-crashes", + "1", + "2", + "100", + (now - timedelta(days=0)).strftime("%Y%m%d"), + "https://errors.internal/", + ) + assert crash_rate == {"increase": False} + + def test_get_package_crash_rate_new_package(self, datetime_now, cassandra_data): + """Test case where there's no old version data - should return increase=True with difference=today_crashes""" + now = datetime_now + + crash_rate = cassie.get_package_crash_rate( + "Ubuntu 24.04", + "new-package", + "0", # Old version that doesn't exist + "1", + "100", + (now - timedelta(days=0)).strftime("%Y%m%d"), + "https://errors.internal/", + ) + assert crash_rate == approx( + { + "increase": True, + "difference": 5, # Should equal the number of crashes today + "web_link": "https://errors.internal/?release=Ubuntu%2024.04&package=new-package&version=1", + "previous_average": None, + }, + rel=1e-1, + ) + + def test_get_package_crash_rate_low_difference(self, datetime_now, cassandra_data): + """Test case where crash rate is similar between versions (difference <= 1) - should return increase=False""" + now = datetime_now + + crash_rate = cassie.get_package_crash_rate( + "Ubuntu 24.04", + "low-difference", + "1", + "2", + "100", + (now - timedelta(days=0)).strftime("%Y%m%d"), + "https://errors.internal/", + ) + assert crash_rate == {"increase": False} + + def test_get_package_crash_rate_all_proposed(self, datetime_now, cassandra_data): + """Test case where all today's crashes are from proposed and we exclude proposed - should return increase=False""" + now = datetime_now + + crash_rate = cassie.get_package_crash_rate( + "Ubuntu 24.04", + "all-proposed", + "1", + "2", + "100", + (now - timedelta(days=0)).strftime("%Y%m%d"), + "https://errors.internal/", + exclude_proposed=True, + ) + assert crash_rate == {"increase": False} diff --git a/src/tests/test_oopses.py b/src/tests/test_oopses.py index 7dc886b..3ab9104 100644 --- a/src/tests/test_oopses.py +++ b/src/tests/test_oopses.py @@ -102,7 +102,7 @@ def _test_insert_check(self, oopsid, day_key, value=None): assert value == result["duration"] # The oops has been indexed by day oops_refs = cassandra_schema.DayOOPS.filter(key=day_key.encode()).only(["value"]) - assert [oopsid] == [day_oops.value.decode() for day_oops in oops_refs] + assert oopsid in [day_oops.value.decode() for day_oops in oops_refs] # TODO - the aggregates for the OOPS have been updated. def test_insert_oops_dict(self, temporary_db): @@ -124,12 +124,12 @@ def test_insert_updates_counters(self, temporary_db): day_key = oopses.insert_dict(oopsid, oops, user_token) oops_count = cassandra_schema.Counters.filter(key=b"oopses", column1=day_key) - assert [1] == [count.value for count in oops_count] + assert [3] == [count.value for count in oops_count] oopsid = str(uuid.uuid1()) day_key = oopses.insert_dict(oopsid, oops, user_token) oops_count = cassandra_schema.Counters.filter(key=b"oopses", column1=day_key) - assert [2] == [count.value for count in oops_count] + assert [4] == [count.value for count in oops_count] class TestBucket: