diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..6b2253b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,52 @@ + +name: release + +on: + release: + types: [released] + +jobs: + build: + + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f requirements-release.txt ]; then pip install -r requirements-release.txt; fi + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. + flake8 . --exclude tests --ignore E301,E302,E303,E305,W391 --count --exit-zero --max-complexity=10 --max-line-length=300 --statistics + + - name: Test with pytest & Collect coverage + run: | + pytest -v tests --cov=src.csvdiff3 --cov-report=term-missing --cov-report=html + + - name: Make coverage badge + if: ${{ matrix.python-version==3.9 }} + run: | + coverage-badge -o ./htmlcov/coverage.svg + + - name: Upload coverage-report to GitHub Pages + if: ${{ matrix.python-version==3.9 }} + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./htmlcov diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 0000000..26762e4 --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,40 @@ + +name: testing + +on: + pull_request: + branches: [main, develop] + +jobs: + build: + + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f requirements-testing.txt ]; then pip install -r requirements-testing.txt; fi + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. + flake8 . --exclude tests --ignore E301,E302,E303,E305,W391 --count --exit-zero --max-complexity=10 --max-line-length=300 --statistics + + - name: Test with pytest & Collect coverage + run: | + pytest -v tests --cov=src.csvdiff3 --cov-report=term-missing diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d75c211 --- /dev/null +++ b/.gitignore @@ -0,0 +1,249 @@ + +# Created by https://www.toptal.com/developers/gitignore/api/python,venv,pycharm+all +# Edit at https://www.toptal.com/developers/gitignore?templates=python,venv,pycharm+all + +### PyCharm+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm+all Patch ### +# Ignores the whole .idea folder and all .iml files +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + +.idea/ + +# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 + +*.iml +modules.xml +.idea/misc.xml +*.ipr + +# Sonarlint plugin +.idea/sonarlint + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +### venv ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +pip-selfcheck.json + +# End of https://www.toptal.com/developers/gitignore/api/python,venv,pycharm+all diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..3609cf7 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.6.13 diff --git a/README.md b/README.md index dbbff90..8f8e120 100644 --- a/README.md +++ b/README.md @@ -1 +1,232 @@ -# csv-diff-python3 \ No newline at end of file + +# csv-diff-python3 + +[![Python Version](https://img.shields.io/badge/Python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)](README.md/#herb-requirements) +[![testing](https://github.com/blue-monk/csv-diff-python3/actions/workflows/testing.yml/badge.svg)](https://github.com/blue-monk/csv-diff-python3/actions/workflows/testing.yml) +[![coverage](https://github.com/blue-monk/csv-diff-python3/blob/gh-pages/coverage.svg)](https://blue-monk.github.io/csv-diff-python3/) +[![License](https://img.shields.io/github/license/blue-monk/csv-diff-python3)](LICENSE) + + +## :herb: Overview + +A simple command-line tool to see the difference between two CSV files. + +This tool reports in the following style, and you can choose how to report. + +1. Report the number of differences and line numbers +2. Report diff marks along with the contents of each CSV line + * You can choose the following report styles + * Horizontal (Side-by-side) display style + * Vertical display style + * You can choose to report only the lines with differences or all lines + + +--- +:palm_tree: DEMO + +![DEMO](appendix/csv-diff-animation.gif) + +--- + + +## :herb: Table of Contents + +* [**Why csv-diff?**](#herb-why-csv-diff) +* [**Feature**](#herb-features) +* [**Requirements**](#herb-requirements) + * [Runtime](#runtime) + * [CSV files](#csv-files) +* [**Installation**](#herb-installation) + * [With pip](#with-pip) + * [Manual Installation](#manual-installation) +* [**Run**](#herb-run) + * [If installed with pip](#if-installed-with-pip) + * [If installed manually](#if-installed-manually) +* [**How to use**](#herb-how-to-use) + * [Get Help](#get-help) + * [One example](#one-example) +* [**Notices**](#herb-notices) +* [**Known Issues**](#herb-known-issues) +* [**Contributing**](#herb-contributing) + * [Reporting Bugs / Feature Requests](#reporting-bugs--feature-requests) +* [**License**](#herb-license) + + +## :herb: Why csv-diff? + +The `diff` command that compares files is unaware of key columns (like primary keys in a database). +Therefore, it may give undesired results in detecting differences in CSV files that have key columns. + +For example, consider comparing the contents of tables in two databases that are inaccessible to each other. +One way is to output each database's data as a CSV file and compare it. +In this case, the `diff` command does not pay attention to the key columns, so lines with different keys may be compared. +It is not possible to make an accurate judgment of the difference with the key in mind. + +This tool, on the other hand, recognizes key columns and detects differences. +Specify the key columns as an argument at the time of execution. You can get the comparison result you want. + + +## :herb: Features + +* CSV delimiter, line feed character, presence/absence of header, etc. are automatically determined (can be specified) +* Make a comparison after matching with the key columns +* You can specify columns that are not compared +* Differences can be displayed side-by-side (more suitable when the number of columns is small) +* Differences can be displayed in vertical order (more suitable when the number of columns is large) +* Differences are indicated by the following marks, which we call DIFF-MARK + * `!`: There is a difference + * `<`: Exists only on the left side + * `>`: Exists only on the right side +* It is also possible to display only the number of differences and the line number with the difference +* It is possible to compare one file with commas and one file with tabs +* Low memory consumption +* Only Python standard modules are used and provided as a single file, so it is easy to install even on an isolated environment + + +## :herb: Requirements + +### Runtime +* Python3.6 or later + +### CSV files +* Must be sorted by key columns + + +## :herb: Installation + +### With pip + +```sh +pip install git+https://github.com/blue-monk/csv-diff-python3 +``` +It may be safer to install it on a virtual environment created with venv. + +### Manual installation + +Place `csvdiff.py` in any directory on the machine where Python 3 is installed. +It will be easier to use if you place it in a directory defined on PATH. + +## :herb: Run + +### If installed with pip + +```sh +$ csvdiff3 -h +``` + +### If installed manually + +```sh +$ python csvdiff.py -h +``` +or +```shell +$ chmod +x csvdiff.py +$ ./csvdiff.py -h +``` + +## :herb: How to use + +See the [Wiki](https://github.com/blue-monk/csv-diff-python3/wiki) for more details. +* [Wiki/Command](https://github.com/blue-monk/csv-diff-python3/wiki/Command) +* [Wiki/How to use](https://github.com/blue-monk/csv-diff-python3/wiki/How-to-use) + +### Get help +```sh +$ ./csvdiff.py -h +``` + +### One example + +Here is one example with the following sample data in `appendix/csv_samples/`. +See the [Wiki/How to use](https://github.com/blue-monk/csv-diff-python3/wiki/How-to-use) for more details. + +#### Sample data + +Suppose the keys are the 0th column and the 2nd column. + +* sample_lhs.csv + ```csv + head1, head2, head3, head4, head5 + key1-2, value1-2, key2-2, value2-2, 20201224T035908 + key1-3, value1-3, key2-3, value2-3, 20201224T180527 + key1-4, value1-4, key2-4, value2-4, 20201225T104851 + key1-5, value1-5, key2-5, value2-5, 20201225T142142 + ``` + +* sample_rhs.csv + ```csv + head1, head2, head3, head4, head5 + key1-1, value1-1, key2-1, value2-1, 20210108T142358 + key1-2, value1-3, key2-2, value2-z, 20210108T174216 + key1-4, value1-4, key2-4, value2-4, 20210109T090245 + key1-5, value1-v, key2-5, value2-5, 20210109T111231 + ``` + +#### Example of use + +To view the contents of different lines, Use the `-d` (`--show-difference-only`) option. +If you also want to see the number of differences, put the `-c` option (`--show-count`). + +```sh +$ ../../src/csvdiff3/csvdiff.py sample_lhs.csv sample_rhs.csv -k 0,2 -dc + +============ Report ============ + +* Differences +------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +sample_lhs.csv sample_rhs.csv Column indices with difference +------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['key1-1', 'value1-1', 'key2-1', 'value2-1', '20210108T142358'] +2 ['key1-2', 'value1-2', 'key2-2', 'value2-2', '20201224T035908'] ! 3 ['key1-2', 'value1-3', 'key2-2', 'value2-z', '20210108T174216'] @ [1, 3, 4] +3 ['key1-3', 'value1-3', 'key2-3', 'value2-3', '20201224T180527'] < +4 ['key1-4', 'value1-4', 'key2-4', 'value2-4', '20201225T104851'] ! 4 ['key1-4', 'value1-4', 'key2-4', 'value2-4', '20210109T090245'] @ [4] +5 ['key1-5', 'value1-5', 'key2-5', 'value2-5', '20201225T142142'] ! 5 ['key1-5', 'value1-v', 'key2-5', 'value2-5', '20210109T111231'] @ [1, 4] + +* Count & Row number +same lines : 0 +left side only (<): 1 :-- Row Numbers -->: [3] +right side only (>): 1 :-- Row Numbers -->: [2] +with differences (!): 3 :-- Row Number Pairs -->: [(2, 3), (4, 4), (5, 5)] +``` +* Differences are indicated by the following DIFF-MARKs + * `!` : There is a difference + * `<` : Exists only on the left side + * `>` : Exists only on the right side + +* The number displayed before each CSV line data is the line number of the actual file + * line number is 1 based + +* For rows with differences, the column indices with differences will be displayed after `@` + * column index is 0 based + + +## :herb: Notices + +* *For large amounts of data* + + In the case of a horizontal report, + it takes longer than a vertical report because all lines are scanned in advance to collect information for report formatting. + For large amounts of data, consider vertical reports. + +## :herb: Known Issues + +* *Workaround for only one line* + + If the CSV file contains only one line, it will be recognized as a header. + You need to specify the option `-H n` to be recognized as CSV without a header. + + +## :herb: Contributing + +### Reporting Bugs / Feature Requests + +We welcome you to use the GitHub issue tracker to report bugs or suggest features. + + +## :herb: License + +csv-diff-python3 is released under the MIT license. Please read the [LICENSE](LICENSE) file for more information. + + + diff --git a/appendix/csv-diff-animation.gif b/appendix/csv-diff-animation.gif new file mode 100644 index 0000000..958cabb Binary files /dev/null and b/appendix/csv-diff-animation.gif differ diff --git a/appendix/csv_samples/sample_lhs.csv b/appendix/csv_samples/sample_lhs.csv new file mode 100644 index 0000000..3258701 --- /dev/null +++ b/appendix/csv_samples/sample_lhs.csv @@ -0,0 +1,5 @@ +head1, head2, head3, head4, head5 +key1-2, value1-2, key2-2, value2-2, 20201224T035908 +key1-3, value1-3, key2-3, value2-3, 20201224T180527 +key1-4, value1-4, key2-4, value2-4, 20201225T104851 +key1-5, value1-5, key2-5, value2-5, 20201225T142142 diff --git a/appendix/csv_samples/sample_rhs.csv b/appendix/csv_samples/sample_rhs.csv new file mode 100644 index 0000000..401579a --- /dev/null +++ b/appendix/csv_samples/sample_rhs.csv @@ -0,0 +1,5 @@ +head1, head2, head3, head4, head5 +key1-1, value1-1, key2-1, value2-1, 20210108T142358 +key1-2, value1-3, key2-2, value2-z, 20210108T174216 +key1-4, value1-4, key2-4, value2-4, 20210109T090245 +key1-5, value1-v, key2-5, value2-5, 20210109T111231 diff --git a/requirements-release.txt b/requirements-release.txt new file mode 100644 index 0000000..f80cd21 --- /dev/null +++ b/requirements-release.txt @@ -0,0 +1,4 @@ +flake8~=3.9.2 +pytest~=6.2.5 +pytest-cov~=3.0.0 +coverage-badge~=1.0.2 diff --git a/requirements-testing.txt b/requirements-testing.txt new file mode 100644 index 0000000..bc58fc7 --- /dev/null +++ b/requirements-testing.txt @@ -0,0 +1,3 @@ +flake8~=3.9.2 +pytest~=6.2.5 +pytest-cov~=3.0.0 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..17fe835 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[metadata] +name = csv-diff-python3-blue-monk +version = 1.0.0 +author = blue-monk +author_email = blue.monk.487@gmail.com +description = A simple command-line tool to see the difference between two CSV files. +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/blue-monk/csv-diff-python3 +project_urls = + Bug Tracker = https://github.com/blue-monk/csv-diff-python3/issues +classifiers = + Programming Language :: Python :: 3 + License :: OSI Approved :: MIT License + Operating System :: OS Independent + +[options] +package_dir = + = src +packages = find: +python_requires =>=3.6 + +[options.packages.find] +where = src diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b6af001 --- /dev/null +++ b/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup + +setup( + name="csv-diff-python3-blue-monk", + install_requires=[ + ], + extras_require={ + }, + entry_points={ + 'console_scripts': [ + 'csvdiff3=csvdiff3.csvdiff:main', + ], + }, +) diff --git a/src/csvdiff3/__init__.py b/src/csvdiff3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/csvdiff3/csvdiff.py b/src/csvdiff3/csvdiff.py new file mode 100644 index 0000000..e78ab55 --- /dev/null +++ b/src/csvdiff3/csvdiff.py @@ -0,0 +1,1369 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import abc +import binascii +import csv +import functools +import logging +import os +import sys +import time +import traceback +import unicodedata +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from logging import Logger + + +# ---------------------------------------------------------------------------------------------------------------------- +# Decorators +# ---------------------------------------------------------------------------------------------------------------------- + +def show_execution_time(): + + def _execution_time(func): + + def wrapper(*args, **kwargs): + + start = time.perf_counter() + + func(*args, **kwargs) + + elapsed_time = time.perf_counter() - start + print() + print(f'elapsed_time={elapsed_time}[sec]') + print() + + return wrapper + + return _execution_time + + +def spacing_before(number_of_lines): + + number_of_lines = number_of_lines or 1 + + def _spacing_before(func): + + def wrapper(*args, **kwargs): + + for i in range(number_of_lines): + print('') + + func(*args, **kwargs) + + return wrapper + + return _spacing_before + + +# ---------------------------------------------------------------------------------------------------------------------- +# Entrance +# ---------------------------------------------------------------------------------------------------------------------- + +# @show_execution_time() +def main(): + + configure() + + context = context_from_arguments() + show_context_for_debugging(context) + + try: + run_in(context) + except IndexError as e: + logger.error(f'It is possible that the number of columns in the row is not aligned. Please check the csv data. If not, please file an issue. [{type(e)}, description={e}]') + sys.exit(1) + + +class App(type): + + NAME = 'csv-diff-python3@blue-monk' + VERSION = '1.0.0' + + +class LoggingConfig(type): + + # For debug, play with the CONSOLE_LEVEL or FILE_LEVEL. + + BASE_LEVEL = logging.DEBUG + + CONSOLE_LEVEL = logging.ERROR + CONSOLE_FORMAT = '%(levelname)s: %(message)s' + + FILE_LEVEL = logging.WARNING + FILE_FORMAT = '%(asctime)s: %(levelname)s: %(message)s' + FILE_PATH = 'csvdiff.log' + + +logger: Logger = logging.getLogger(__name__) + + +def configure(): + + logging.basicConfig(level=LoggingConfig.BASE_LEVEL) + + stream_handler = logging.StreamHandler() + stream_handler.setLevel(LoggingConfig.CONSOLE_LEVEL) + stream_handler.setFormatter(logging.Formatter(LoggingConfig.CONSOLE_FORMAT)) + + file_handler = logging.FileHandler(filename=LoggingConfig.FILE_PATH, mode='w') + file_handler.setLevel(LoggingConfig.FILE_LEVEL) + file_handler.setFormatter(logging.Formatter(LoggingConfig.FILE_FORMAT)) + + logger.addHandler(stream_handler) + logger.addHandler(file_handler) + + logger.propagate = False + + +# ---------------------------------------------------------------------------------------------------------------------- +# Context Preparation +# ---------------------------------------------------------------------------------------------------------------------- + +def context_from_arguments(): + + def arg_type_matching_key_in_csv(x): + return list(map(MatchingKeyInfo, x.split(','))) + + def arg_type_int_in_csv(x): + return list(map(int, x.split(','))) + + + parser = ArgumentParser(prog=App.NAME, formatter_class=ArgumentDefaultsHelpFormatter) + + # Program name & Version ------------------------------------------------------------------------------------------- + parser.add_argument('--version', action='version', version=f'%(prog)s {App.VERSION}') + + # Input CSV file paths --------------------------------------------------------------------------------------------- + parser.add_argument('lhs_file_name', type=str, help='Absolute/Relative path to left-hand side file.') + parser.add_argument('rhs_file_name', type=str, help='Absolute/Relative path to right-hand side file.') + + # Input CSV file encodings ----------------------------------------------------------------------------------------- + parser.add_argument('-e', '--encoding', type=str, default=None, + help='Encoding of the CSV files. (refer public reference named "Standard encoding") e.g.: shift_jis') + + parser.add_argument('--encoding-for-lhs', type=str, default='utf8', + help='Encoding of the CSV file on the left side. (refer public reference named "Standard encoding") e.g.: shift_jis') + parser.add_argument('--encoding-for-rhs', type=str, default='utf8', + help='Encoding of the CSV file on the right side. (refer public reference named "Standard encoding") e.g.: shift_jis') + + # Matching conditions ---------------------------------------------------------------------------------------------- + parser.add_argument('-k', '--matching-keys', type=arg_type_matching_key_in_csv, default='0', + help='Matching key indices(from 0) for Input CSV in CSV format. For non-fixed length numbers, specify the number of digits after ":". e.g.: 0:8,3') + parser.add_argument('-u', '--unique-key', default=False, action='store_true', + help="Specify if the matching key is unique. Then, if it detects that the matching key is not unique, an error will occur.") + parser.add_argument('-i', '--ignore-columns', type=arg_type_int_in_csv, default=[], + help='Specify the index of the column to be ignored in CSV format. e.g.: 3,7') + + # Report styles ---------------------------------------------------------------------------------------------------- + parser.add_argument('-v', '--vertical-style', default=False, action='store_true', + help='Report in vertical style. If not specified, report in horizontal(two facing) style.') + + parser.add_argument('-c', '--show-count', default=False, action='store_true', + help='Report the number of differences. Treat this as True if neither -d nor -a is specified.') + + display_group = parser.add_mutually_exclusive_group() + display_group.add_argument('-d', '--show-difference-only', default=False, action='store_true', + help='Report the lines with the difference. Can be used with option -c. Cannot be used with option -a.') + display_group.add_argument('-a', '--show-all-lines', action='store_true', + help='Report on all lines. Can be used with option -c. Cannot be used with option -d.') + + parser.add_argument('-x', '--show-context-from-arguments', default=False, action='store_true', + help='Report the context generated from the arguments and CSV sniffing.') + + # CSV analysis conditions ------------------------------------------------------------------------------------------ + parser.add_argument('-H', '--header', type=str, default=None, choices=['n', 'y'], + help='If specified, this specification will be enforced.') + + parser.add_argument('-S', '--sniffing-size', type=str, default=4096, + help="If csv sniffing fails, try specifying a size larger than 4096. Or Explicitly specify CSV file conditions like '--column-separator-for-lhs TAB'. Check help with -h option.") + + parser.add_argument('-F', '--force-individual-specs', action='store_true', + help="If you don't want to rely on csv sniffing, specify it, and then specify --column-separator and so on separately.") + + parser.add_argument('--column-separator', type=str, default=None, choices=['COMMA', 'TAB', 'SEMICOLON'], + help='Process both sides CSV file using the specified column delimiter.') + + parser.add_argument('--line-separator', type=str, default=None, choices=['LF', 'CRLF'], + help='Process both sides CSV file using the specified line separator.') + + parser.add_argument('--quote-char', type=str, default=None, choices=['"', "'"], + help='Process both sides CSV file using the specified quote character.') + + parser.add_argument('--no-skip-space-after-column-separator', action='store_true', + help='Specify when you want to treat the space immediately after the separator as data for the both sides CSV file.') + + # CSV analysis conditions by left and right ------------------------------------------------------------------------ + parser.add_argument('--column-separator-for-lhs', type=str, default="COMMA", choices=['COMMA', 'TAB', 'SEMICOLON'], + help='Process left-hand side CSV file using the specified column delimiter.') + + parser.add_argument('--column-separator-for-rhs', type=str, default="COMMA", choices=['COMMA', 'TAB', 'SEMICOLON'], + help='Process right-hand side CSV file using the specified column delimiter.') + + parser.add_argument('--line-separator-for-lhs', type=str, default="LF", choices=['LF', 'CRLF'], + help='Process left-hand side CSV file using the specified line separator.') + + parser.add_argument('--line-separator-for-rhs', type=str, default="LF", choices=['LF', 'CRLF'], + help='Process right-hand side CSV file using the specified line separator.') + + parser.add_argument('--quote-char-for-lhs', type=str, default='"', choices=['"', "'"], + help='Process left-hand side CSV file using the specified quote character.') + + parser.add_argument('--quote-char-for-rhs', type=str, default='"', choices=['"', "'"], + help='Process right-hand side CSV file using the specified quote character.') + + parser.add_argument('--no-skip-space-after-column-separator-for-lhs', default=False, action='store_true', + help='Specify when you want to treat the space immediately after the separator as data for the CSV file on the left side.') + + parser.add_argument('--no-skip-space-after-column-separator-for-rhs', default=False, action='store_true', + help='Specify when you want to treat the space immediately after the separator as data for the CSV file on the right side.') + + # ------------------------------------------------------------------------------------------------------------------ + + return Context(parser.parse_args()) + + +class Context: + + LINE_SEPARATOR_s = { + "CR": '\r', + "LF": '\n', + "CRLF": '\r\n', + None: '', + } + + COLUMN_SEPARATOR_s = { + "COMMA": ',', + "TAB": '\t', + "SEMICOLON": ';', + None: '', + } + + def __init__(self, args): + + # Input CSV file paths ----------------------------------------------------------------------------------------- + self.lhs_file_name = args.lhs_file_name + self.rhs_file_name = args.rhs_file_name + self.lhs_file_path = os.path.abspath(args.lhs_file_name) + self.rhs_file_path = os.path.abspath(args.rhs_file_name) + + # Input CSV file encodings ------------------------------------------------------------------------------------- + if args.encoding: + self.encoding_for_lhs = args.encoding + self.encoding_for_rhs = args.encoding + else: + self.encoding_for_lhs = args.encoding_for_lhs + self.encoding_for_rhs = args.encoding_for_rhs + + # Matching conditions ------------------------------------------------------------------------------------------ + self.matching_key_codec = MatchingKeyCodec(args.matching_keys) + self.key_should_be_unique = args.unique_key + self.column_indices_to_ignore = args.ignore_columns + + # Report styles ------------------------------------------------------------------------------------------------ + self.reports_in_vertical_style = args.vertical_style + self.reports_in_horizontal_style = not args.vertical_style + + self.shows_count = args.show_count + self.shows_difference_only = args.show_difference_only + self.shows_all_lines = args.show_all_lines + self.shows_details = True if self.shows_difference_only or self.shows_all_lines else False + self.shows_context_from_arguments = args.show_context_from_arguments + + self.needs_size_info_for_padding = self.shows_details and self.reports_in_horizontal_style + + # CSV analysis conditions -------------------------------------------------------------------------------------- + self.header = args.header + self.first_row_is_header = None + + self.sniffing_size = args.sniffing_size + + self.forces_individual_specs = args.force_individual_specs + + if self.forces_individual_specs and args.column_separator: + self.column_separator_for_lhs = self.COLUMN_SEPARATOR_s[args.column_separator] + self.column_separator_for_rhs = self.COLUMN_SEPARATOR_s[args.column_separator] + else: + self.column_separator_for_lhs = self.COLUMN_SEPARATOR_s[args.column_separator_for_lhs] + self.column_separator_for_rhs = self.COLUMN_SEPARATOR_s[args.column_separator_for_rhs] + + if self.forces_individual_specs and args.line_separator: + self.line_separator_for_lhs = self.LINE_SEPARATOR_s[args.line_separator] + self.line_separator_for_rhs = self.LINE_SEPARATOR_s[args.line_separator] + else: + self.line_separator_for_lhs = self.LINE_SEPARATOR_s[args.line_separator_for_lhs] + self.line_separator_for_rhs = self.LINE_SEPARATOR_s[args.line_separator_for_rhs] + + if self.forces_individual_specs and args.quote_char: + self.quote_char_for_lhs = args.quote_char + self.quote_char_for_rhs = args.quote_char + else: + self.quote_char_for_lhs = args.quote_char_for_lhs + self.quote_char_for_rhs = args.quote_char_for_rhs + + if self.forces_individual_specs and args.no_skip_space_after_column_separator: + self.skips_space_after_column_separator_for_lhs = not args.no_skip_space_after_column_separator + self.skips_space_after_column_separator_for_rhs = not args.no_skip_space_after_column_separator + else: + self.skips_space_after_column_separator_for_lhs = True + self.skips_space_after_column_separator_for_rhs = True + + if self.forces_individual_specs and args.no_skip_space_after_column_separator: + self.skips_space_after_column_separator_for_lhs = not args.no_skip_space_after_column_separator + self.skips_space_after_column_separator_for_rhs = not args.no_skip_space_after_column_separator + else: + self.skips_space_after_column_separator_for_lhs = True + self.skips_space_after_column_separator_for_rhs = True + + + self._validate() + self._normalize() + + def _validate(self): + + if not os.path.exists(self.lhs_file_path): + logger.error(f'lhs_file_path not exists. [lhs_file_path={self.lhs_file_path}]') + sys.exit(1) + if not os.path.exists(self.rhs_file_path): + logger.error(f'rhs_file_path not exists. [rhs_file_path={self.rhs_file_path}]') + sys.exit(1) + + if not os.path.isfile(self.lhs_file_path): + logger.error(f'lhs_file_path is not a file. [lhs_file_path={self.lhs_file_path}]') + sys.exit(1) + if not os.path.isfile(self.rhs_file_path): + logger.error(f'rhs_file_path is not a file. [rhs_file_path={self.rhs_file_path}]') + sys.exit(1) + + def _normalize(self): + + if not any([self.shows_count, self.shows_difference_only, self.shows_all_lines]): + self.shows_count = True + + def display_string_for_column_separator(self, value): + + candidates = [k for k, v in self.COLUMN_SEPARATOR_s.items() if v == value] + if candidates: + return candidates[0] + else: + f'undefined({value})' + + def display_string_for_line_separator(self, value, file_arrangement): + + encoding_value = getattr(self, "encoding" + file_arrangement) + return binascii.hexlify(value.encode(encoding_value)).decode() + + +def show_context_for_debugging(cxt): + + logger.debug(f'lhs_file_name={cxt.lhs_file_name}') + logger.debug(f'rhs_file_name={cxt.rhs_file_name}') + logger.debug(f'lhs_file_path={cxt.lhs_file_path}') + logger.debug(f'rhs_file_path={cxt.rhs_file_path}') + + logger.debug(f'encoding_for_lhs={cxt.encoding_for_lhs}') + logger.debug(f'encoding_for_rhs={cxt.encoding_for_rhs}') + + logger.debug(f'matching_key_codec={cxt.matching_key_codec}') + logger.debug(f'key_should_be_unique={cxt.key_should_be_unique}') + logger.debug(f'column_indices_to_ignore={cxt.column_indices_to_ignore}') + + logger.debug(f'reports_in_vertical_style={cxt.reports_in_vertical_style}') + logger.debug(f'reports_in_horizontal_style={cxt.reports_in_horizontal_style}') + logger.debug(f'shows_count={cxt.shows_count}') + logger.debug(f'shows_difference_only={cxt.shows_difference_only}') + logger.debug(f'shows_all_lines={cxt.shows_all_lines}') + logger.debug(f'shows_context_from_arguments={cxt.shows_context_from_arguments}') + logger.debug(f'needs_size_info_for_padding={cxt.needs_size_info_for_padding}') + + logger.debug(f'first_row_is_header={cxt.first_row_is_header}') + logger.debug(f'sniffing_size={cxt.sniffing_size}') + logger.debug(f'force_individual_specs={cxt.forces_individual_specs}') + + logger.debug(f'column_separator_for_lhs={cxt.display_string_for_column_separator(cxt.column_separator_for_lhs)}') + logger.debug(f'column_separator_for_rhs={cxt.display_string_for_column_separator(cxt.column_separator_for_rhs)}') + logger.debug(f'line_separator_for_lhs={cxt.display_string_for_line_separator(cxt.line_separator_for_lhs, FileArrangement.LHS)}') + logger.debug(f'line_separator_for_rhs={cxt.display_string_for_line_separator(cxt.line_separator_for_rhs, FileArrangement.RHS)}') + logger.debug(f'quote_char_for_lhs={cxt.quote_char_for_lhs}') + logger.debug(f'quote_char_for_rhs={cxt.quote_char_for_rhs}') + logger.debug(f'skips_space_after_column_separator_for_lhs={cxt.skips_space_after_column_separator_for_lhs}') + logger.debug(f'skips_space_after_column_separator_for_rhs={cxt.skips_space_after_column_separator_for_rhs}') + + logger.debug(f'MatchingKeyCodec#END_of_KEY={MatchingKeyCodec.END_of_KEY}') + + +# ---------------------------------------------------------------------------------------------------------------------- +# Matching Key Treatment +# ---------------------------------------------------------------------------------------------------------------------- + +class MatchingKeyInfo: + + def __init__(self, specified_string): + + elements = list(filter(lambda x: x != '', specified_string.split(':'))) + + index = elements.pop(0) + self.index = self._transform_into_numeric(index, 'index') + + max_length = elements.pop(0) if elements else '0' + self.max_length = self._transform_into_numeric(max_length, 'max_length') + + def __repr__(self): + return f"{self.__class__.__name__}({self.index!r}, {(self.max_length if self.max_length> 0 else '')!r})" + + @classmethod + def _transform_into_numeric(cls, value, name): + + if not value.isdigit(): + logger.error(f'MATCHING_KEY_INDICES should be a number. See also help. [specified {name}={value}]') + exit(1) + + return int(value) + + def key_for(self, row): + return row[self.index].rjust(self.max_length, '0') + + +class MatchingKeyCodec: + + END_of_KEY = 'ZZZ' + SEPARATOR = '..' + + def __init__(self, matching_key_info_list): + self.matching_key_info_list = matching_key_info_list + + def __repr__(self): + return f'{self.__class__.__name__}({self.matching_key_info_list!r})' + + def managed_key_for(self, row): + + try: + return functools.reduce(lambda making, matching_key: making + matching_key.key_for(row) + self.SEPARATOR, + self.matching_key_info_list, self.SEPARATOR) + except IndexError: + logger.error(f'one of the indices specified for MATCHING_KEY_INDICES is out of range [MATCHING_KEY_INDICES={self.matching_key_info_list}, number of columns = {len(row)}, row={row}]') + exit(1) + + @property + def matching_key_indices(self): + return list(map(lambda matching_key_info: matching_key_info.index, self.matching_key_info_list)) + + @classmethod + def decode_key(cls, key): + """ Leave the padding as it is. """ + return key.strip(cls.SEPARATOR).split(cls.SEPARATOR) + + + +# ---------------------------------------------------------------------------------------------------------------------- +# Control and Determine if it exists only on the left, only on the right, or both +# ---------------------------------------------------------------------------------------------------------------------- + +def run_in(context): + + with open(context.lhs_file_path, mode='r', encoding=context.encoding_for_lhs) as lhs_csv,\ + open(context.rhs_file_path, mode='r', encoding=context.encoding_for_rhs) as rhs_csv: + + lhs_dialect, adjusted_context = CsvDialectFixer.fixed_dialect(context, lhs_csv, FileArrangement.LHS) + rhs_dialect, adjusted_context = CsvDialectFixer.fixed_dialect(adjusted_context, rhs_csv, FileArrangement.RHS) + + csv_reader = CsvReader(lhs_csv, rhs_csv, lhs_dialect, rhs_dialect, adjusted_context) + pre_scan_result = PreScanner.scan(adjusted_context, csv_reader) + csv_reader.reset() + + detect_diff(adjusted_context, csv_reader, pre_scan_result) + + +def detect_diff(context, csv_reader, pre_scan_result): + + value_difference_detector = ValueDifferenceDetector(pre_scan_result.number_of_columns, + context.matching_key_codec.matching_key_indices, + context.column_indices_to_ignore) + + heading_reporter = HeadingReporter(context) + detail_reporter = DetailReporter.Factory.reporter_for(context, pre_scan_result) + count_reporter = CountReporter(context.shows_count) + counter = count_reporter.counter + + heading_reporter.report_heading() + detail_reporter.report_detail_heading() + + + def existed_only_on_lhs(lhs_fact): + counter.count_for_case_of_existed_only_on_lhs(lhs_fact.lhs_row_number) + detail_reporter.report_case_of_existed_only_on_lhs(lhs_fact) + + def existed_on_both_sides(lhs_fact, rhs_fact): + value_difference_result = value_difference_detector.detect_difference_between(lhs_fact.lhs_row, rhs_fact.rhs_row) + counter.count_for_case_of_existed_on_both_sides(lhs_fact, rhs_fact, value_difference_result) + detail_reporter.report_case_of_existed_on_both_sides(lhs_fact, rhs_fact, value_difference_result) + + def existed_only_on_rhs(rhs_fact): + counter.count_for_case_of_existed_only_on_rhs(rhs_fact.rhs_row_number) + detail_reporter.report_case_of_existed_only_on_rhs(rhs_fact) + + perform_key_matching(csv_reader, existed_only_on_lhs, existed_on_both_sides, existed_only_on_rhs) + + + count_reporter.report_count() + + +def perform_key_matching(csv_reader, callback_for_lhs_only, callback_for_both_sides, callback_for_rhs_only): + + lhs_fact = csv_reader.read_lhs() + rhs_fact = csv_reader.read_rhs() + + while lhs_fact.lhs_key != MatchingKeyCodec.END_of_KEY or rhs_fact.rhs_key != MatchingKeyCodec.END_of_KEY: + + if lhs_fact.lhs_key < rhs_fact.rhs_key: + callback_for_lhs_only(lhs_fact) + lhs_fact = csv_reader.read_lhs() + + elif lhs_fact.lhs_key == rhs_fact.rhs_key: + callback_for_both_sides(lhs_fact, rhs_fact) + lhs_fact = csv_reader.read_lhs() + rhs_fact = csv_reader.read_rhs() + + elif lhs_fact.lhs_key> rhs_fact.rhs_key: + callback_for_rhs_only(rhs_fact) + rhs_fact = csv_reader.read_rhs() + + +# ---------------------------------------------------------------------------------------------------------------------- +# Value-Difference Detection +# ---------------------------------------------------------------------------------------------------------------------- + +class ValueDifferenceDetector: + + class ValueDifferenceResult: + + def __init__(self, different_column_indices): + + self.different_column_indices = different_column_indices + + @property + def has_difference(self): + return True if self.different_column_indices else False + + + def __init__(self, number_of_columns, matching_key_indices, ignore_column_indices): + + self.column_indices = range(0, number_of_columns) + logger.debug(f'column_indices={self.column_indices}') + + self.target_column_indices = set(self.column_indices) - set(matching_key_indices) - set(ignore_column_indices) + logger.debug(f'target_column_indices={self.target_column_indices}') + + def detect_difference_between(self, lhs_row, rhs_row): + + different_column_indices = [index for index in self.target_column_indices if lhs_row[index] != rhs_row[index]] + logger.debug(f'different_column_indices={different_column_indices}') + return self.ValueDifferenceResult(different_column_indices) + + + +# ---------------------------------------------------------------------------------------------------------------------- +# Reporting +# ---------------------------------------------------------------------------------------------------------------------- + +class PreScanner: + + class ScanResult: + + def __init__(self, number_of_columns, size_info_for_padding): + self.number_of_columns = number_of_columns + self.size_info_for_padding = size_info_for_padding + + @classmethod + def for_lightly(cls, number_of_columns): + return PreScanner.ScanResult(number_of_columns, None) + + @classmethod + def for_deeply(cls, number_of_columns, lhs_max_row_number, lhs_max_row_length, rhs_max_row_number, rhs_max_row_length): + size_info_for_padding = cls.SizeInfoForPadding(lhs_max_row_number, lhs_max_row_length, rhs_max_row_number, rhs_max_row_length) + return PreScanner.ScanResult(number_of_columns, size_info_for_padding) + + + class SizeInfoForPadding: + + def __init__(self, lhs_max_row_number, lhs_max_row_length, rhs_max_row_number, rhs_max_row_length): + self.lhs_max_row_number = lhs_max_row_number + self.lhs_max_row_length = lhs_max_row_length + self.rhs_max_row_number = rhs_max_row_number + self.rhs_max_row_length = rhs_max_row_length + + + def __init__(self): + pass + + @classmethod + def scan(cls, context, csv_reader): + + if context.needs_size_info_for_padding: + return PreScanner._scan_deeply(csv_reader) + else: + return PreScanner._scan_lightly(csv_reader) + + + @classmethod + def _scan_deeply(cls, csv_reader): + """ + Notes + ----- + Purpose of deep pre-scanning + * Determine the number of columns for value difference detection + * Get size information to format the horizontal report + """ + + start_ = time.perf_counter() + + lhs_max_row_length, rhs_max_row_length = 0, 0 + + lhs_fact = csv_reader.read_lhs() + rhs_fact = csv_reader.read_rhs() + + number_of_columns = cls._determine_number_of_columns_from(lhs_fact, rhs_fact) + + while lhs_fact.lhs_key != MatchingKeyCodec.END_of_KEY: + lhs_max_row_length = max(lhs_max_row_length, UnicodeSupport.string_length_considering_east_asian_characters_of(str(lhs_fact.lhs_row))) + lhs_fact = csv_reader.read_lhs() + + while rhs_fact.rhs_key != MatchingKeyCodec.END_of_KEY: + rhs_max_row_length = max(rhs_max_row_length, UnicodeSupport.string_length_considering_east_asian_characters_of(str(rhs_fact.rhs_row))) + rhs_fact = csv_reader.read_rhs() + + lhs_max_row_number = csv_reader.lhs_csv_state.row_number + rhs_max_row_number = csv_reader.rhs_csv_state.row_number + logger.debug(f'lhs_max_row_number={lhs_max_row_number}') + logger.debug(f'rhs_max_row_number={rhs_max_row_number}') + + elapsed_time_ = time.perf_counter() - start_ + logger.debug(f'PreScanner#scan() elapsed_time:{elapsed_time_}[sec]') + return PreScanner.ScanResult.for_deeply(number_of_columns, + lhs_max_row_number, lhs_max_row_length, rhs_max_row_number, rhs_max_row_length) + + @classmethod + def _scan_lightly(cls, csv_reader): + """ + Notes + ----- + Purpose of light pre-scanning + * Determine the number of columns for value difference detection + + Vertical reports do not require size information for formatting. + """ + + lhs_fact = csv_reader.read_lhs() + rhs_fact = csv_reader.read_rhs() + + return PreScanner.ScanResult.for_lightly(cls._determine_number_of_columns_from(lhs_fact, rhs_fact)) + + @classmethod + def _determine_number_of_columns_from(cls, lhs_fact, rhs_fact): + + number_of_columns = 0 + if lhs_fact.lhs_row: + number_of_columns = len(lhs_fact.lhs_row) + elif rhs_fact.rhs_row: + number_of_columns = len(rhs_fact.rhs_row) + + return number_of_columns + + + +class Mark(type): + + LHS_ONLY = '<' + RHS_ONLY = '>' + HAS_DIFF = '!' + NON_DIFF = ' ' + NON_DIFF_EXPRESSLY = '=' + + +class HeadingReporter: + + def __init__(self, context): + self.cxt = context + + + def report_heading(self): + + self._report_title() + + if self.cxt.shows_context_from_arguments: + self._report_context() + + @classmethod + @spacing_before(1) + def _report_title(cls): + print('============ Report ============') + + @spacing_before(1) + def _report_context(self): + + print('くろまる Context') + print(f'File Path on the Left-Hand Side: {self.cxt.lhs_file_path}') + print(f'File Path on the Right-Hand Side : {self.cxt.rhs_file_path}') + print(f'Matching Key Indices: {self.cxt.matching_key_codec.matching_key_info_list}') + print(f'Matching Key Is Unique?: {self.cxt.key_should_be_unique}') + print(f'Column Indices to Ignore: {self.cxt.column_indices_to_ignore}') + print(f'with Header?: {self.cxt.first_row_is_header}') + print(f'Report Style: {"Vertical" if self.cxt.reports_in_vertical_style else "Two facing (Horizontal)"}') + print(f'Show Count?: {self.cxt.shows_count}') + print(f'Show Difference Only?: {self.cxt.shows_difference_only}') + print(f'Show All?: {self.cxt.shows_all_lines}') + print(f'Show Context?: {self.cxt.shows_context_from_arguments}') + print(f'File Encoding for Left-Hand Side: {self.cxt.encoding_for_lhs}') + print(f'File Encoding for Right-Hand Side: {self.cxt.encoding_for_rhs}') + print(f'CSV Sniffing Size: {self.cxt.sniffing_size}') + print('--- csv analysis conditions ---') + print(f'Forces Individual Specified Conditions?: {self.cxt.forces_individual_specs}') + print(f'column_separator_for_lhs: {self.cxt.display_string_for_column_separator(self.cxt.column_separator_for_lhs)}') + print(f'column_separator_for_rhs: {self.cxt.display_string_for_column_separator(self.cxt.column_separator_for_rhs)}') + print(f'line_separator_for_lhs: {self.cxt.display_string_for_line_separator(self.cxt.line_separator_for_lhs, FileArrangement.LHS)}') + print(f'line_separator_for_rhs: {self.cxt.display_string_for_line_separator(self.cxt.line_separator_for_rhs, FileArrangement.RHS)}') + print(f'quote_char_for_lhs: {self.cxt.quote_char_for_lhs}') + print(f'quote_char_for_rhs: {self.cxt.quote_char_for_rhs}') + print(f'skips_space_after_column_separator_for_lhs: {self.cxt.skips_space_after_column_separator_for_lhs}') + print(f'skips_space_after_column_separator_for_rhs: {self.cxt.skips_space_after_column_separator_for_rhs}') + + +class DetailReporter: + + __metaclass__ = abc.ABCMeta + + def __init__(self, context): + self.cxt = context + + + def report_detail_heading(self): + + if not self.cxt.shows_details: + return + + self._report_content_heading() + self._report_file_name() + + @spacing_before(1) + def _report_content_heading(self): + if self.cxt.shows_difference_only: + print('くろまる Differences') + elif self.cxt.shows_all_lines: + print('くろまる All') + else: + pass + + @abc.abstractmethod + def _report_file_name(self): + raise NotImplementedError() + + + @abc.abstractmethod + def report_case_of_existed_only_on_lhs(self, lhs_fact): + raise NotImplementedError() + + @abc.abstractmethod + def report_case_of_existed_on_both_sides(self, lhs_fact, rhs_fact, value_difference_result): + raise NotImplementedError() + + @abc.abstractmethod + def report_case_of_existed_only_on_rhs(self, rhs_fact): + raise NotImplementedError() + + + class Factory: + + def __init__(self): + pass + + @staticmethod + def reporter_for(context, scan_result): + + if context.reports_in_vertical_style: + return VerticalReporter(context, scan_result) + else: + return HorizontalReporter(context, scan_result) + + +class HorizontalReporter(DetailReporter): + + class Template: + + DIFFERENT_COLUMN_GUIDE = 'Column indices with difference' + PREFIX_of_DIFF_COLUMNS = ' @ ' + + def __init__(self, lhs_max_row_number_length, lhs_max_row_length, rhs_max_row_number_length, rhs_max_row_length): + + self.lhs_max_row_number_length = lhs_max_row_number_length + self.lhs_filler_length = 1 + self.lhs_max_row_length = lhs_max_row_length + self.diff_mark_filler_length_in_front = 2 + self.diff_mark_length = 1 + self.diff_mark_filler_length_in_rear = 2 + self.rhs_max_row_number_length = rhs_max_row_number_length + self.rhs_filler_length = 1 + self.rhs_max_row_length = rhs_max_row_length + self.prefix_length_for_diff_columns_displays = len(self.PREFIX_of_DIFF_COLUMNS) + + self.lhs_length = self.lhs_max_row_number_length + self.lhs_filler_length + self.lhs_max_row_length + self.diff_mark_length = self.diff_mark_filler_length_in_front + self.diff_mark_length + self.diff_mark_filler_length_in_rear + self.rhs_length = self.rhs_max_row_number_length + self.rhs_filler_length + self.rhs_max_row_length + + + # --- heading-related description --- + + def division_string(self): + return '-' * (self.lhs_length + self.diff_mark_length + self.rhs_length + self.prefix_length_for_diff_columns_displays + len(self.DIFFERENT_COLUMN_GUIDE)) + + def file_name_description(self, lhs_file_name, rhs_file_name): + + lhs_file_name = UnicodeSupport.left_justified(lhs_file_name, self.lhs_length) + diff_mark_spacing = ' ' * self.diff_mark_length + rhs_file_name = UnicodeSupport.left_justified(rhs_file_name, self.rhs_length) + prefix_length_spacing = ' ' * self.prefix_length_for_diff_columns_displays + return f'{lhs_file_name}{diff_mark_spacing}{rhs_file_name}{prefix_length_spacing}{self.DIFFERENT_COLUMN_GUIDE}' + + + # --- left-hand side related description --- + + def lhs_only_description(self, lhs_fact): + + lhs = self._lhs_description(lhs_fact) + diff_mark_area = (' ' * self.diff_mark_filler_length_in_front) + Mark.LHS_ONLY + (' ' * self.diff_mark_filler_length_in_rear) + return f'{lhs}{diff_mark_area}' + + def _lhs_description(self, lhs_fact): + + lhs_row_number = UnicodeSupport.right_justified(str(lhs_fact.lhs_row_number), self.lhs_max_row_number_length) + spacing = ' ' * self.lhs_filler_length + lhs_row = UnicodeSupport.left_justified(str(lhs_fact.lhs_row), self.lhs_max_row_length) + return f'{lhs_row_number}{spacing}{lhs_row}' + + def _lhs_empty_description(self): + return ' ' * (self.lhs_max_row_number_length + self.lhs_filler_length + self.lhs_max_row_length) + + + # --- right-hand side related description --- + + def rhs_only_description(self, rhs_fact): + + empty_lhs = self._lhs_empty_description() + diff_mark_area = (' ' * self.diff_mark_filler_length_in_front) + Mark.RHS_ONLY + (' ' * self.diff_mark_filler_length_in_rear) + rhs = self._rhs_description(rhs_fact) + return f'{empty_lhs}{diff_mark_area}{rhs}' + + def _rhs_description(self, rhs_fact): + + rhs_row_number = UnicodeSupport.right_justified(str(rhs_fact.rhs_row_number), self.rhs_max_row_number_length) + spacing = ' ' * self.rhs_filler_length + rhs_row = UnicodeSupport.left_justified(str(rhs_fact.rhs_row), self.rhs_max_row_length) + return f'{rhs_row_number}{spacing}{rhs_row}' + + + # --- both sides related description --- + + def both_description(self, lhs_fact, rhs_fact, value_difference_result): + + lhs = self._lhs_description(lhs_fact) + diff_mark = Mark.HAS_DIFF if value_difference_result.has_difference else Mark.NON_DIFF + diff_mark_area = (' ' * self.diff_mark_filler_length_in_front) + diff_mark + (' ' * self.diff_mark_filler_length_in_rear) + rhs = self._rhs_description(rhs_fact) + prefix_of_diff_columns = self.PREFIX_of_DIFF_COLUMNS if value_difference_result.has_difference else '' + different_columns = str(value_difference_result.different_column_indices) if value_difference_result.has_difference else '' + return f'{lhs}{diff_mark_area}{rhs}{prefix_of_diff_columns}{different_columns}' + + + def __init__(self, context, scan_result): + + super(HorizontalReporter, self).__init__(context) + self.cxt = context + + if context.needs_size_info_for_padding: + size_info = scan_result.size_info_for_padding + self.template = HorizontalReporter.Template(len(str(size_info.lhs_max_row_number)), + size_info.lhs_max_row_length, + len(str(size_info.rhs_max_row_number)), + size_info.rhs_max_row_length) + else: + self.template = None + + + # --- report heading related --- + + def _report_file_name(self): + + print(self.template.division_string()) + print(self.template.file_name_description(os.path.basename(self.cxt.lhs_file_name), os.path.basename(self.cxt.rhs_file_name))) + print(self.template.division_string()) + + + # --- report each cases --- + + def report_case_of_existed_only_on_lhs(self, lhs_fact): + + if self.cxt.shows_details: + print(self.template.lhs_only_description(lhs_fact)) + + def report_case_of_existed_on_both_sides(self, lhs_fact, rhs_fact, value_difference_result): + + if (self.cxt.shows_difference_only and value_difference_result.has_difference) or self.cxt.shows_all_lines: + print(self.template.both_description(lhs_fact, rhs_fact, value_difference_result)) + + def report_case_of_existed_only_on_rhs(self, rhs_fact): + + if self.cxt.shows_details: + print(self.template.rhs_only_description(rhs_fact)) + + +class VerticalReporter(DetailReporter): + + class Template: + + LHS_MARK = 'L' + RHS_MARK = 'R' + PREFIX_of_DIFF_COLUMNS = '@' + + def __init__(self): + pass + + + # --- heading-related description --- + + @classmethod + def division_string(cls): + return '-' * 80 + + @classmethod + def file_name_description(cls, mark, file_name): + return f'{mark} {file_name}' + + + # --- left-hand side related description --- + + @classmethod + def lhs_only_description(cls, lhs_fact): + return f'{Mark.LHS_ONLY} {cls.LHS_MARK} {str(lhs_fact.lhs_row_number)} {str(lhs_fact.lhs_row)}' + + + # --- right-hand side related description --- + + @classmethod + def rhs_only_description(cls, rhs_fact): + return f'{Mark.RHS_ONLY} {cls.RHS_MARK} {str(rhs_fact.rhs_row_number)} {str(rhs_fact.rhs_row)}' + + + # --- both sides related description --- + + @classmethod + def both_description_heading(cls, value_difference_result): + + if value_difference_result.has_difference: + return f'{Mark.HAS_DIFF} {cls.PREFIX_of_DIFF_COLUMNS} {str(value_difference_result.different_column_indices)}' + else: + return Mark.NON_DIFF_EXPRESSLY + + @classmethod + def both_description_lhs(cls, lhs_fact, row_number_length): + return f' {cls.LHS_MARK} {str(lhs_fact.lhs_row_number).rjust(row_number_length)} {str(lhs_fact.lhs_row)}' + + @classmethod + def both_description_rhs(cls, rhs_fact, row_number_length): + return f' {cls.RHS_MARK} {str(rhs_fact.rhs_row_number).rjust(row_number_length)} {str(rhs_fact.rhs_row)}' + + + + def __init__(self, context, _): + + super(VerticalReporter, self).__init__(context) + self.cxt = context + self.template = VerticalReporter.Template() + + + # --- report heading related --- + + def _report_file_name(self): + + print(self.template.division_string()) + print(self.template.file_name_description(self.template.LHS_MARK, os.path.basename(self.cxt.lhs_file_name))) + print(self.template.file_name_description(self.template.RHS_MARK, os.path.basename(self.cxt.rhs_file_name))) + print(self.template.division_string()) + + + # --- report each cases --- + + def report_case_of_existed_only_on_lhs(self, lhs_fact): + + if self.cxt.shows_details: + print(self.template.lhs_only_description(lhs_fact)) + + def report_case_of_existed_on_both_sides(self, lhs_fact, rhs_fact, value_difference_result): + + if (self.cxt.shows_difference_only and value_difference_result.has_difference) or self.cxt.shows_all_lines: + + row_number_length = max(len(str(lhs_fact.lhs_row_number)), len(str(rhs_fact.rhs_row_number))) + + print(self.template.both_description_heading(value_difference_result)) + print(self.template.both_description_lhs(lhs_fact, row_number_length)) + print(self.template.both_description_rhs(rhs_fact, row_number_length)) + + def report_case_of_existed_only_on_rhs(self, rhs_fact): + + if self.cxt.shows_details: + print(self.template.rhs_only_description(rhs_fact)) + + +class CountReporter: + + class Counter: + + def __init__(self): + + self.number_of_same_lines = 0 + self.number_of_lhs_only = 0 + self.number_of_rhs_only = 0 + self.number_of_differences = 0 + + self.row_numbers_for_lhs_only = [] + self.row_numbers_for_rhs_only = [] + self.row_numbers_for_differences = {} + + self._max_digit = None + + def _increment_same_lines(self): + self.number_of_same_lines += 1 + + def _increment_lhs_only(self): + self.number_of_lhs_only += 1 + + def _increment_rhs_only(self): + self.number_of_rhs_only += 1 + + def _increment_differences(self): + self.number_of_differences += 1 + + def _add_row_number_for_lhs_only(self, row_number): + self.row_numbers_for_lhs_only.append(row_number) + + def _add_row_number_for_rhs_only(self, row_number): + self.row_numbers_for_rhs_only.append(row_number) + + def _add_row_number_for_differences(self, lhs_row_number, rhs_row_number): + self.row_numbers_for_differences[lhs_row_number] = rhs_row_number + + + def count_for_case_of_existed_only_on_lhs(self, row_number): + self._increment_lhs_only() + self._add_row_number_for_lhs_only(row_number) + + def count_for_case_of_existed_on_both_sides(self, lhs_fact, rhs_fact, value_difference_result): + + if value_difference_result.has_difference: + self._increment_differences() + self._add_row_number_for_differences(lhs_fact.lhs_row_number, rhs_fact.rhs_row_number) + else: + self._increment_same_lines() + + def count_for_case_of_existed_only_on_rhs(self, row_number): + self._increment_rhs_only() + self._add_row_number_for_rhs_only(row_number) + + @property + def sorted_row_numbers_for_differences(self): + return sorted(self.row_numbers_for_differences.items(), key=lambda x: x[0]) + + + @property + def max_digit(self): + + if self._max_digit is not None: + return self._max_digit + + self._max_digit = max( + len(str(self.number_of_same_lines)), + len(str(self.number_of_lhs_only)), + len(str(self.number_of_rhs_only)), + len(str(self.number_of_differences)), + ) + return self._max_digit + + + def __init__(self, shows_count): + self.shows_count = shows_count + self.counter = self.Counter() + + + def _func_of_right_justified_number(self): + return lambda number: str(number).rjust(self.counter.max_digit) + + @spacing_before(1) + def report_count(self): + + if not self.shows_count: + return + + print('くろまる Count & Row number') + + rjust = self._func_of_right_justified_number() + print('same lines : {}'.format(rjust(self.counter.number_of_same_lines))) + print('left side only ({}): {} :-- Row Numbers -->: {}'.format(Mark.LHS_ONLY, rjust(self.counter.number_of_lhs_only), self.counter.row_numbers_for_lhs_only)) + print('right side only ({}): {} :-- Row Numbers -->: {}'.format(Mark.RHS_ONLY, rjust(self.counter.number_of_rhs_only), self.counter.row_numbers_for_rhs_only)) + print('with differences ({}): {} :-- Row Number Pairs -->: {}'.format(Mark.HAS_DIFF, rjust(self.counter.number_of_differences), self.counter.sorted_row_numbers_for_differences)) + + +class UnicodeSupport: + + @classmethod + def left_justified(cls, value, length): + return f"{value}{' ' * (length - cls.string_length_considering_east_asian_characters_of(value))}" + + @classmethod + def right_justified(cls, value, length): + return f"{' ' * (length - cls.string_length_considering_east_asian_characters_of(value))}{value}" + + @staticmethod + def string_length_considering_east_asian_characters_of(text): + return functools.reduce(lambda counting, c: counting + (2 if unicodedata.east_asian_width(c) in 'FWA' else 1), + text, 0) + + +# ---------------------------------------------------------------------------------------------------------------------- +# CSV Reading +# ---------------------------------------------------------------------------------------------------------------------- + +class FileArrangement(type): + + LHS = '_for_lhs' + RHS = '_for_rhs' + + +class CsvDialectFixer: + + def __init__(self): + pass + + @classmethod + def fixed_dialect(cls, context, csv_file, file_arrangement): + + if context.forces_individual_specs: + return cls._dialect_from_context(context, file_arrangement) + else: + return cls._try_sniffing(context, csv_file, file_arrangement) + + + @classmethod + def _dialect_from_context(cls, context, file_arrangement): + + dialect = csv.excel() + dialect.delimiter = getattr(context, "column_separator" + file_arrangement) + dialect.lineterminator = getattr(context, "line_separator" + file_arrangement) + dialect.quotechar = getattr(context, "quote_char" + file_arrangement) + dialect.skipinitialspace = getattr(context, "skips_space_after_column_separator" + file_arrangement) + + return dialect, context + + @classmethod + def _try_sniffing(cls, context, csv_file, file_arrangement): + + try: + return cls._sniff(context, csv_file, file_arrangement) + + except csv.Error as e: + + logger.warning(f'Sniffing failed. Generated a dialect from context instead. [type={type(e)}, args={str(e.args)}, message={traceback.format_exception_only(type(e), e)}]') + return cls._dialect_from_context(context, file_arrangement) + + finally: + csv_file.seek(0) + + @classmethod + def _sniff(cls, context, csv_file, file_arrangement): + + sample = csv_file.read(context.sniffing_size) + sniffer = csv.Sniffer() + dialect = sniffer.sniff(sample) + has_header = sniffer.has_header(sample) + + adjusted_context = cls._adjust_context_with(dialect, has_header, context, file_arrangement) + + return dialect, adjusted_context + + @classmethod + def _adjust_context_with(cls, dialect, has_header, context, file_arrangement): + + setattr(context, "column_separator" + file_arrangement, dialect.delimiter) + setattr(context, "line_separator" + file_arrangement, dialect.lineterminator) + setattr(context, "quote_char" + file_arrangement, dialect.quotechar) + setattr(context, "skips_space_after_column_separator" + file_arrangement, dialect.skipinitialspace) + context.first_row_is_header = has_header if context.header is None else (True if context.header == 'y' else False) + + return context + + +def show_dialect_for_debugging(dialect, context, message, file_arrangement): + + logger.debug(f'---{message}---') + logger.debug(f'sniffing dialect={dialect}') + logger.debug(f'sniffing dialect csv.excel={isinstance(dialect, csv.excel)}') + logger.debug(f'sniffing dialect csv.excel_tab={isinstance(dialect, csv.excel_tab)}') + logger.debug(f'sniffing dialect csv.unix_dialect={isinstance(dialect, csv.unix_dialect)}') + logger.debug(f'sniffing dialect.delimiter={context.display_string_for_column_separator(dialect.delimiter)}') + logger.debug(f'sniffing dialect.doublequote={dialect.doublequote}') + logger.debug(f'sniffing dialect.escapechar={dialect.escapechar}') + logger.debug(f'sniffing dialect.lineterminator={context.display_string_for_line_separator(dialect.lineterminator, file_arrangement)}') + logger.debug(f'sniffing dialect.quotechar={dialect.quotechar}') + logger.debug(f'sniffing dialect.quoting={dialect.quoting}') + logger.debug(f'sniffing dialect.skipinitialspace={dialect.skipinitialspace}') + + + +class LhsFact: + + def __init__(self, lhs_row_number, lhs_row, lhs_key): + + logger.debug(f'LhsFact 生成 lhs_row_number={lhs_row_number}, lhs_row={lhs_row}, lhs_key={lhs_key}') + + self.lhs_row_number = lhs_row_number + self.lhs_row = lhs_row + self.lhs_key = lhs_key + + +class RhsFact: + + def __init__(self, rhs_row_number, rhs_row, rhs_key): + + logger.debug(f'RhsFact 生成 rhs_row_number={rhs_row_number}, rhs_row={rhs_row}, rhs_key={rhs_key}') + + self.rhs_row_number = rhs_row_number + self.rhs_row = rhs_row + self.rhs_key = rhs_key + + +class CsvReader: + + class State: + + def __init__(self, csv_file, dialect, file_name, first_row_is_header): + + self._csv_file = csv_file + self._dialect = dialect + self._file_name = file_name + self._first_row_is_header = first_row_is_header + + self._csv_reader = csv.reader(csv_file, dialect) + self._row_number = 0 + self._previous_key = "" + + def reset(self): + + self._csv_file.seek(0) + self._csv_reader = csv.reader(self._csv_file, self._dialect) + self._row_number = 0 + self._previous_key = "" + + def increment_row_number(self): + + if self._previous_key == MatchingKeyCodec.END_of_KEY: + return + + self._row_number += 1 + + def key_changed(self, new_key): + + if self._is_header(): + return + + self._previous_key = new_key + + def _is_header(self): + return self.row_number == 0 and self._first_row_is_header + + @property + def csv_reader(self): + return self._csv_reader + + @property + def file_name(self): + return self._file_name + + @property + def row_number(self): + return self._row_number + + @property + def previous_key(self): + return self._previous_key + + + def __init__(self, lhs_csv, rhs_csv, lhs_dialect, rhs_dialect, context): + + show_dialect_for_debugging(lhs_dialect, context, '左CSV', FileArrangement.LHS) + show_dialect_for_debugging(rhs_dialect, context, '右CSV', FileArrangement.RHS) + + self.lhs_csv_state = CsvReader.State(lhs_csv, lhs_dialect, context.lhs_file_name, context.first_row_is_header) + self.rhs_csv_state = CsvReader.State(rhs_csv, rhs_dialect, context.rhs_file_name, context.first_row_is_header) + self.cxt = context + + self.skip_header() + + def skip_header(self): + + if self.cxt.first_row_is_header: + _ = self.read_lhs() + _ = self.read_rhs() + + def reset(self): + + self.lhs_csv_state.reset() + self.rhs_csv_state.reset() + self.skip_header() + + def read_lhs(self): + + lhs_row, lhs_key = self._read_csv(self.lhs_csv_state) + self.lhs_csv_state.increment_row_number() + return LhsFact(self.lhs_csv_state.row_number, lhs_row, lhs_key) + + def read_rhs(self): + + rhs_row, rhs_key = self._read_csv(self.rhs_csv_state) + self.rhs_csv_state.increment_row_number() + return RhsFact(self.rhs_csv_state.row_number, rhs_row, rhs_key) + + def _read_csv(self, csv_state): + + try: + row = next(csv_state.csv_reader) + except StopIteration: + csv_state.key_changed(MatchingKeyCodec.END_of_KEY) + return [], MatchingKeyCodec.END_of_KEY + + new_key = self.cxt.matching_key_codec.managed_key_for(row) + self._detect_key_violation(new_key, csv_state) + + csv_state.key_changed(new_key) + + return row, new_key + + def _detect_key_violation(self, new_key, csv_state): + + if csv_state.previous_key == '': + return + + if new_key < csv_state.previous_key: + logger.error(f'matching keys in {csv_state.file_name} are not sorted.' + f' [current_key={MatchingKeyCodec.decode_key(new_key)}, previous_key={MatchingKeyCodec.decode_key(csv_state.previous_key)}, matching-key-indices={self.cxt.matching_key_codec.matching_key_info_list}]' + f' If the key is a number without zero padding, specify the max size of the key after colon like -k0:8.') + exit(1) + + if self.cxt.key_should_be_unique and new_key == csv_state.previous_key: + logger.error(f'matching keys in {csv_state.file_name} are not unique.' + f' [current_key={MatchingKeyCodec.decode_key(new_key)}, previous_key={MatchingKeyCodec.decode_key(csv_state.previous_key)}, matching-key-indices={self.cxt.matching_key_codec.matching_key_info_list}]') + exit(1) + + +if __name__ == '__main__': + + main() + + + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..3fa3fdc --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,68 @@ +import os + +import pytest + +from src.csvdiff3.csvdiff import MatchingKeyInfo + + +@pytest.fixture(scope='function') +def args(): + return type("Arguments", (object,), { + "lhs_file_name": "", + "rhs_file_name": "", + "encoding": "", + "encoding_for_lhs": "utf8", + "encoding_for_rhs": "utf8", + "matching_keys": [MatchingKeyInfo('0')], + "unique_key": False, + "ignore_columns": [], + "vertical_style": False, + "show_count": False, + "show_difference_only": False, + "show_all_lines": False, + "show_context_from_arguments": False, + "sniffing_size": 4096, + "force_individual_specs": False, + "header": None, + "column_separator": None, + "line_separator": None, + "quote_char": None, + "no_skip_space_after_column_separator": "", + "column_separator_for_lhs": "COMMA", + "column_separator_for_rhs": "COMMA", + "line_separator_for_lhs": "LF", + "line_separator_for_rhs": "LF", + "quote_char_for_lhs": '"', + "quote_char_for_rhs": '"', + "no_skip_space_after_column_separator_for_lhs": False, + "no_skip_space_after_column_separator_for_rhs": False, + }) + +@pytest.fixture(scope='function') +def lhs(tmpdir): + lhs = tmpdir.join("left.csv") + return lhs + +@pytest.fixture(scope='function') +def rhs(tmpdir): + rhs = tmpdir.join("right.csv") + return rhs + +@pytest.fixture(scope='function') +def path_to_tests_dir(): + return './' if current_folder_name() == 'tests' else 'tests' + +def current_folder_name(): + return os.path.basename(os.getcwd()) + +@pytest.fixture(scope='function') +def lhs_dir(tmpdir): + lhs = tmpdir.mkdir("left_dir") + return lhs + +@pytest.fixture(scope='function') +def rhs_dir(tmpdir): + rhs = tmpdir.mkdir("right_dir") + return rhs + + diff --git a/tests/data/e2e_04_file_encoding/left_EUC-JP.csv b/tests/data/e2e_04_file_encoding/left_EUC-JP.csv new file mode 100644 index 0000000..8b936c0 --- /dev/null +++ b/tests/data/e2e_04_file_encoding/left_EUC-JP.csv @@ -0,0 +1,8 @@ +"�������إå���1", "�������إå���2", "�������إå���3", "�������إå���4", "�������إå���5" +"1", "�ͣ�-��", "��������", "����", "���ʡ����顦�ǥ�����" +"2", "�ͣ�-��", "��������", "����", "��Ĺ�٥��ѡ���" +"3", "�ͣ�-��", "��������", "����", "��������Į�֥롼���Ǥ��Τä�" +"4", "��i-��", "��������", "�̳�ƻ", "��������ϼ" +"5", "�ͣ�-��", "��������", "����", "���Ÿ��˲���Ǧ��1⁄4" +"6", "�ͣ�-��", "��������", "����", "��ƽ��ê��" +"7", "�ͣ�-��", "��������", "����", "�����ܵ��ԻԾ����躣�����̱�����������������������������������Į" diff --git a/tests/data/e2e_04_file_encoding/left_Shift_JIS.csv b/tests/data/e2e_04_file_encoding/left_Shift_JIS.csv new file mode 100644 index 0000000..8eff720 --- /dev/null +++ b/tests/data/e2e_04_file_encoding/left_Shift_JIS.csv @@ -0,0 +1,8 @@ +"�J�����w�b�_�[1", "�J�����w�b�_�[2", "�J�����w�b�_�[3", "�J�����w�b�_�[4", "�J�����w�b�_�[5" +"1", "�l�P-�P", "�P�O�O�P", "����", "�E�i�E�Z���E�f�B����" +"2", "�l�P-�Q", "�P�O�O�Q", "����", "�����x�A�p�[�g" +"3", "�l�P-�R", "�P�O�O�R", "���l", "�ɐ����ؒ��u���[�X�ł��̂���" +"4", "�li-�S", "�P�O�O�S", "�k�C��", "�r���R�̘[" +"5", "�l�P-�T", "�P�O�O�T", "�O�d", "�O�d���ɉ��s�E�ґ�" +"6", "�l�P-�U", "�P�O�O�U", "�V��", "�����̒I�c" +"7", "�l�P-�V", "�P�O�O�V", "���s", "���s�{���s�s�ソ�捡�o���ʉG�ۓ����������ؖړ������鑊�������O��" diff --git a/tests/data/e2e_04_file_encoding/left_UTF-8.csv b/tests/data/e2e_04_file_encoding/left_UTF-8.csv new file mode 100644 index 0000000..c5629be --- /dev/null +++ b/tests/data/e2e_04_file_encoding/left_UTF-8.csv @@ -0,0 +1,8 @@ +"カラムヘッダー1", "カラムヘッダー2", "カラムヘッダー3", "カラムヘッダー4", "カラムヘッダー5" +"1", "値1−1", "1001", "東京", "ウナ・セラ・ディ東京" +"2", "値1−2", "1002", "大阪", "西長堀アパート" +"3", "値1−3", "1003", "横浜", "伊勢佐木町ブルースでも歌って" +"4", "値i−4", "1004", "北海道", "羊蹄山の麓🌱" +"5", "値1−5", "1005", "三重", "三重県伊賀市忍者村" +"6", "値1−6", "1006", "新潟", "星峠の棚田🌙" +"7", "値1−7", "1007", "京都", "京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町" diff --git a/tests/data/e2e_04_file_encoding/right_EUC-JP.csv b/tests/data/e2e_04_file_encoding/right_EUC-JP.csv new file mode 100644 index 0000000..d2cc7e3 --- /dev/null +++ b/tests/data/e2e_04_file_encoding/right_EUC-JP.csv @@ -0,0 +1,8 @@ +"�������إå���1", "�������إå���2", "�������إå���3", "�������إå���4", "�������إå���5" +"1", "�ͣ�-��", "��������", "����", "���ʡ����顦�ǥ�����" +"2", "�ͣ�-��", "��������", "����", "��Ĺ�٥��ѡ���" +"3", "�ͣ�-��", "��������", "����", "��������Į�֥롼���Ǥ��Τä�" +"4", "�ͣ�-��", "��������", "�̳�ƻ", "��������ϼ" +"5", "�ͣ�-��", "�����", "����", "���Ÿ��˲���Ǧ��1⁄4" +"6", "�ͣ�-��", "��������", "����", "��ƽ��ê��" +"7", "�ͣ�-��", "��������", "����", "�����ܵ��ԻԾ����躣�����̱�����������������������������������Į" \ No newline at end of file diff --git a/tests/data/e2e_04_file_encoding/right_Shift_JIS.csv b/tests/data/e2e_04_file_encoding/right_Shift_JIS.csv new file mode 100644 index 0000000..ed4f6c2 --- /dev/null +++ b/tests/data/e2e_04_file_encoding/right_Shift_JIS.csv @@ -0,0 +1,8 @@ +"�J�����w�b�_�[1", "�J�����w�b�_�[2", "�J�����w�b�_�[3", "�J�����w�b�_�[4", "�J�����w�b�_�[5" +"1", "�l�P-�P", "�P�O�O�P", "����", "�E�i�E�Z���E�f�C����" +"2", "�l�P-�Q", "�P�O�O�Q", "����", "�����x�A�p�[�g" +"3", "�l�P-�R", "�P�O�O�R", "���l", "�ɐ����ؒ��u���[�Y�ł��̂���" +"4", "�l�P-�S", "�P�O�O�S", "�k�C��", "�r���R�̘[" +"5", "�l�P-�T", "�P���O�T", "���d", "�O�d���ɉ��s�E�ґ�" +"6", "�l�P-�U", "�P�O�O�U", "�V�G", "�����̒I�c" +"7", "�l�P-�V", "�P�O�O�V", "���s", "���s�{���s�s�ソ�捡�o���ʉG�ۓ����������ؖړ������鑊�������O��" \ No newline at end of file diff --git a/tests/data/e2e_04_file_encoding/right_UTF-8.csv b/tests/data/e2e_04_file_encoding/right_UTF-8.csv new file mode 100644 index 0000000..7b0eaf7 --- /dev/null +++ b/tests/data/e2e_04_file_encoding/right_UTF-8.csv @@ -0,0 +1,8 @@ +"カラムヘッダー1", "カラムヘッダー2", "カラムヘッダー3", "カラムヘッダー4", "カラムヘッダー5" +"1", "値1−1", "1001", "東京", "ウナ・セラ・デイ東京" +"2", "値1−2", "1002", "大阪", "西長堀アパート" +"3", "値1−3", "1003", "横浜", "伊勢佐木町ブルーズでも歌って" +"4", "値1−4", "1004", "北海道", "羊蹄山の麓🌱" +"5", "値1−5", "1o05", "二重", "三重県伊賀市忍者村" +"6", "値1−6", "1006", "新烏", "星峠の棚田🌟" +"7", "値1−7", "1007", "京都", "京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町" diff --git a/tests/data/e2e_04_line_break/left_crlf.csv b/tests/data/e2e_04_line_break/left_crlf.csv new file mode 100644 index 0000000..37b25c8 --- /dev/null +++ b/tests/data/e2e_04_line_break/left_crlf.csv @@ -0,0 +1,8 @@ +"head1", "head2", "head3", "head4", "head5", "head6" +"1", "value1-2", "key2-2", "1002", "20210921T035902", "value4-2" +"1", "value1-3", "key2-3", "1003", "20210921T035904", "value4-3" +"102", "value1-4", "key2-1", "1004", "20210924T180521", "value4-e" +"1003", "value1-5", "key2-1", "1005", "20210924T180528", "value4-5" +"1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" +"1003", "value1-7", "key2-3", "1007", "20210923T143258", "value4-7" +"1003", "value1-e", "key2-4", "1008", "20210923T143259", "value4-8" diff --git a/tests/data/e2e_04_line_break/left_lf.csv b/tests/data/e2e_04_line_break/left_lf.csv new file mode 100644 index 0000000..d492829 --- /dev/null +++ b/tests/data/e2e_04_line_break/left_lf.csv @@ -0,0 +1,8 @@ +"head1", "head2", "head3", "head4", "head5", "head6" +"1", "value1-2", "key2-2", "1002", "20210921T035902", "value4-2" +"1", "value1-3", "key2-3", "1003", "20210921T035904", "value4-3" +"102", "value1-4", "key2-1", "1004", "20210924T180521", "value4-e" +"1003", "value1-5", "key2-1", "1005", "20210924T180528", "value4-5" +"1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" +"1003", "value1-7", "key2-3", "1007", "20210923T143258", "value4-7" +"1003", "value1-e", "key2-4", "1008", "20210923T143259", "value4-8" diff --git a/tests/data/e2e_04_line_break/right_crlf.csv b/tests/data/e2e_04_line_break/right_crlf.csv new file mode 100644 index 0000000..5c46077 --- /dev/null +++ b/tests/data/e2e_04_line_break/right_crlf.csv @@ -0,0 +1,7 @@ +"head1", "head2", "head3", "head4", "head5", "head6" +"1", "value1-1", "key2-1", "1001", "20210921T035901", "value4-1" +"1", "value1-2", "key2-2", "1002", "20210921T035902", "value4-2" +"1", "value1-3", "key2-3", "1003", "20210921T035903", "value4-3" +"102", "value1-4e", "key2-1", "1044", "20210924T180529", "value4-4" +"1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" +"1003", "value1-8", "key2-4", "1008", "20210923T143257", "value4-e" diff --git a/tests/data/e2e_04_line_break/right_lf.csv b/tests/data/e2e_04_line_break/right_lf.csv new file mode 100644 index 0000000..105bedd --- /dev/null +++ b/tests/data/e2e_04_line_break/right_lf.csv @@ -0,0 +1,7 @@ +"head1", "head2", "head3", "head4", "head5", "head6" +"1", "value1-1", "key2-1", "1001", "20210921T035901", "value4-1" +"1", "value1-2", "key2-2", "1002", "20210921T035902", "value4-2" +"1", "value1-3", "key2-3", "1003", "20210921T035903", "value4-3" +"102", "value1-4e", "key2-1", "1044", "20210924T180529", "value4-4" +"1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" +"1003", "value1-8", "key2-4", "1008", "20210923T143257", "value4-e" diff --git a/tests/data/e2e_04_special_char/left.csv b/tests/data/e2e_04_special_char/left.csv new file mode 100644 index 0000000..72d133c --- /dev/null +++ b/tests/data/e2e_04_special_char/left.csv @@ -0,0 +1,12 @@ +"head1", "head2", "head3", "head4", "head5", "head6" +"1", "value1 and +value-2", "key2-2", "1,002", "20210921;035902", "value'4'tab 2" +"1", "value1-3", "key2-3", "1003", "20210921 +T035904", "value""4"" +-3" +"102", "value1-4", "key2-1", "1004", "20210924T180521", "value4-e" +"1003", "value1-5", "key2-1", "1005", "20210924T180528", "value4-5" +"1003", "value1 and +value-6", "key2-2", "1,006", "20210923T143259", "value4tab 6" +"1003", "value1-7", "key2-3", "1007", "20210923T143258", "value4-7" +"1003", "value1-e", "key2-4", "1008", "20210923T143259", "value4-8" diff --git a/tests/data/e2e_04_special_char/right.csv b/tests/data/e2e_04_special_char/right.csv new file mode 100644 index 0000000..afa3f3a --- /dev/null +++ b/tests/data/e2e_04_special_char/right.csv @@ -0,0 +1,11 @@ +"head1", "head2", "head3", "head4", "head5", "head6" +"1", "value1-1", "key2-1", "1001", "20210921T035901", "value4-1" +"1", "value1 and +value-2", "key2-2", "1,002", "20210921;035902", "value'4'tab 2" +"1", "value1-3", "key2-3", "1003", "20210921 +T035904", "value""4""- +3" +"102", "value1-4e", "key2-1", "1044", "20210924T180529", "value4-4" +"1003", "value1 and +value6", "key2-2", "1006", "20210923;143259", "value4 6" +"1003", "value1-8", "key2-4", "1008", "20210923T143257", "value4-e" diff --git a/tests/test__MatchingKeyCodec.py b/tests/test__MatchingKeyCodec.py new file mode 100644 index 0000000..c5d6821 --- /dev/null +++ b/tests/test__MatchingKeyCodec.py @@ -0,0 +1,53 @@ +import pytest + +from src.csvdiff3.csvdiff import MatchingKeyCodec, MatchingKeyInfo + + +class EncodeExpected: + + def __init__(self, managed_key, indices): + self.managed_key = managed_key + self.indices = indices + + +@pytest.mark.parametrize( + "matching_key_infos, row, expected", + [ + pytest.param([MatchingKeyInfo('0')], ['key1', 'value1', 'value2'], EncodeExpected('..key1..', [0]), id='encode 1 alphabetical key'), + pytest.param([MatchingKeyInfo('0'), MatchingKeyInfo('2')], ['key1', 'value1', 'key2'], EncodeExpected('..key1..key2..', [0, 2]), id='encode 2 alphabetical keys'), + pytest.param([MatchingKeyInfo('0:10')], ['1', 'value1', 'value2'], EncodeExpected('..0000000001..', [0]), id='encode 1 number key without zero padding'), + pytest.param([MatchingKeyInfo('0:10'), MatchingKeyInfo('2:4')], ['123456789', 'value1', '3'], EncodeExpected('..0123456789..0003..', [0, 2]), id='encode 2 number keys without zero padding'), + pytest.param([MatchingKeyInfo('0:10'), MatchingKeyInfo('1'), MatchingKeyInfo('4:6')], ['123456789', 'key-2', 'value1', 'value2', '98'], EncodeExpected('..0123456789..key-2..000098..', [0, 1, 4]), id='encode 2 number keys and 1 alphabetical key'), + ], +) +def test_encode(matching_key_infos, row, expected): + + sut = MatchingKeyCodec(matching_key_infos) + assert sut.managed_key_for(row) == expected.managed_key + assert sut.matching_key_indices == expected.indices + + + + +class DecodeExpected: + + def __init__(self, key_indices): + self.key_indices = key_indices + + +@pytest.mark.parametrize( + "managed_key, expected", + [ + pytest.param('..key1..', DecodeExpected(['key1']), id='decode 1 alphabetical key'), + pytest.param('..key1..key2..', DecodeExpected(['key1', 'key2']), id='decode 2 alphabetical keys'), + pytest.param('..0000000001..', DecodeExpected(['0000000001']), id='decode 1 number key (original is without zero padding)'), + pytest.param('..0123456789..0003..', DecodeExpected(['0123456789', '0003']), id='decode 2 number keys (original is without zero padding)'), + pytest.param('..0123456789..key-2..000098..', DecodeExpected(['0123456789', 'key-2', '000098']), id='decode 2 number keys and 1 alphabetical key'), + ], +) +def test_decode(managed_key, expected): + + assert MatchingKeyCodec.decode_key(managed_key) == expected.key_indices + + + diff --git a/tests/test__MatchingKeyInfo.py b/tests/test__MatchingKeyInfo.py new file mode 100644 index 0000000..c3dfee9 --- /dev/null +++ b/tests/test__MatchingKeyInfo.py @@ -0,0 +1,39 @@ +import pytest + +from src.csvdiff3.csvdiff import MatchingKeyInfo + + +class Expected: + + def __init__(self, index, max_length): + self.index = index + self.max_length = max_length + +class RowAndExpectedManagedKey: + + def __init__(self, row, expected_managed_key): + self.row = row + self.expected_managed_key = expected_managed_key + + +@pytest.mark.parametrize( + "specified_matching_key, expected, row_and_expected_managed_key_pair", + [ + pytest.param('0', Expected(index=0, max_length=0), RowAndExpectedManagedKey(['key1', 'value1'], expected_managed_key='key1'), id='index only case-1'), + pytest.param('2', Expected(index=2, max_length=0), RowAndExpectedManagedKey(['key1', 'value1', 'key2'], expected_managed_key='key2'), id='index only case-2'), + pytest.param('0:9', Expected(index=0, max_length=9), RowAndExpectedManagedKey(['1', 'value1'], expected_managed_key='000000001'), id='index and max length with 1 digit'), + pytest.param('0:9', Expected(index=0, max_length=9), RowAndExpectedManagedKey(['123456789', 'value1'], expected_managed_key='123456789'), id='index and max length with full digits'), + pytest.param('0:9', Expected(index=0, max_length=9), RowAndExpectedManagedKey(['12345678', 'value1'], expected_managed_key='012345678'), id='index and max length with full digits - 1'), + pytest.param('0:9', Expected(index=0, max_length=9), RowAndExpectedManagedKey(['abc', 'value1'], expected_managed_key='000000abc'), id='index and max length with alphabet'), + ], +) +def test_matching_key_info(specified_matching_key, expected, row_and_expected_managed_key_pair): + + sut = MatchingKeyInfo(specified_matching_key) + + assert sut.index == expected.index + assert sut.max_length == expected.max_length + assert sut.key_for(row_and_expected_managed_key_pair.row) == row_and_expected_managed_key_pair.expected_managed_key + + + diff --git a/tests/test__ValueDifferenceDetector.py b/tests/test__ValueDifferenceDetector.py new file mode 100644 index 0000000..098f2ac --- /dev/null +++ b/tests/test__ValueDifferenceDetector.py @@ -0,0 +1,67 @@ +import pytest + +from src.csvdiff3.csvdiff import ValueDifferenceDetector + + +class Condition: + + def __init__(self, cols, keys, ignores, lhs, rhs): + self.cols = cols + self.keys = keys + self.ignores = ignores + self.lhs = lhs + self.rhs = rhs + +class Expected: + + def __init__(self, has_difference, different_column_indices): + self.has_difference = has_difference + self.different_column_indices = different_column_indices + + +@pytest.mark.parametrize( + "condition, expected", + [ + pytest.param(Condition(cols=5, keys=[0], ignores=[], + lhs=['1', 'value-1', 'value-2', 'value-3', 'value-4'], + rhs=['1', 'value-1', 'value-2', 'value-3', 'value-4']), Expected(has_difference=False, different_column_indices=[]), id='no difference : 1 numerical key'), + pytest.param(Condition(cols=5, keys=[4], ignores=[], + lhs=['value-1', 'value-2', 'value-3', 'value-4', 'key-1'], + rhs=['value-1', 'value-2', 'value-3', 'value-4', 'key-1']), Expected(has_difference=False, different_column_indices=[]), id='no difference : 1 alphabetical key'), + pytest.param(Condition(cols=5, keys=[0, 3], ignores=[], + lhs=['key-1', 'value-1', 'value-2', '7', 'value-3'], + rhs=['key-1', 'value-1', 'value-2', '7', 'value-3']), Expected(has_difference=False, different_column_indices=[]), id='no difference : multiple keys'), + pytest.param(Condition(cols=5, keys=[4, 0], ignores=[], + lhs=['key-1', 'value-1', 'value-2', 'value-3', '7'], + rhs=['key-1', 'value-1', 'value-2', 'value-3', '7']), Expected(has_difference=False, different_column_indices=[]), id='no difference : multiple keys in reverse order'), + pytest.param(Condition(cols=5, keys=[1], ignores=[], + lhs=['value-1', '1', 'value-2', 'value-3', 'value-4'], + rhs=['value-2', '1', 'value-2', 'value-3', 'value-4']), Expected(has_difference=True, different_column_indices=[0]), id='1 difference : at first'), + pytest.param(Condition(cols=5, keys=[1], ignores=[], + lhs=['value-1', '1', 'value-2', 'value-3', 'value-5'], + rhs=['value-1', '1', 'value-2', 'value-3', 'value-4']), Expected(has_difference=True, different_column_indices=[4]), id='1 difference : at last'), + pytest.param(Condition(cols=5, keys=[1], ignores=[], + lhs=['value-1', '1', 'value-2', 'value-3', 'value-5'], + rhs=['value-0', '1', 'value-3', 'value-2', 'value-4']), Expected(has_difference=True, different_column_indices=[0, 2, 3, 4]), id='multi differences : all columns'), + pytest.param(Condition(cols=5, keys=[1], ignores=[0], + lhs=['value-1', '1', 'value-2', 'value-3', 'value-5'], + rhs=['value-0', '1', 'value-3', 'value-2', 'value-4']), Expected(has_difference=True, different_column_indices=[2, 3, 4]), id='multi differences : with first column ignored'), + pytest.param(Condition(cols=5, keys=[1], ignores=[4], + lhs=['value-1', '1', 'value-2', 'value-3', 'value-5'], + rhs=['value-0', '1', 'value-3', 'value-2', 'value-4']), Expected(has_difference=True, different_column_indices=[0, 2, 3]), id='multi differences : with last column ignored'), + pytest.param(Condition(cols=5, keys=[1], ignores=[2, 4], + lhs=['value-1', '1', 'value-2', 'value-3', 'value-5'], + rhs=['value-0', '1', 'value-3', 'value-2', 'value-4']), Expected(has_difference=True, different_column_indices=[0, 3]), id='multi differences : with multi columns ignored'), + ], +) +def test_value_difference_detector(condition, expected): + + sut = ValueDifferenceDetector(number_of_columns=condition.cols , matching_key_indices=condition.keys, ignore_column_indices=condition.ignores) + + actual = sut.detect_difference_between(condition.lhs, condition.rhs) + + assert actual.has_difference == expected.has_difference + assert actual.different_column_indices == expected.different_column_indices + + + diff --git a/tests/test_e2e_01_0Lines.py b/tests/test_e2e_01_0Lines.py new file mode 100644 index 0000000..6905742 --- /dev/null +++ b/tests/test_e2e_01_0Lines.py @@ -0,0 +1,67 @@ +import sys +import textwrap + +import pytest + +from src.csvdiff3 import csvdiff + + +@pytest.mark.filterwarnings("ignore:Sniffing failed") +def test_no_lines_on_both_sides_as_no_header(lhs, rhs, capfd, args): + + lhs.write(textwrap.dedent(''' + ''').strip()) + rhs.write(textwrap.dedent(''' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-dc'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + ● Differences + ------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------- + + ● Count & Row number + same lines : 0 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 0 :-- Row Number Pairs -->: [] + ''') + +def test_no_lines_on_both_sides_with_header(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-dc'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + ------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------- + + くろまる Count & Row number + same lines : 0 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 0 :-- Row Number Pairs -->: [] + ''') + + + diff --git a/tests/test_e2e_02_1Lines.py b/tests/test_e2e_02_1Lines.py new file mode 100644 index 0000000..ee5e760 --- /dev/null +++ b/tests/test_e2e_02_1Lines.py @@ -0,0 +1,192 @@ +import sys +import textwrap + +import pytest + +from src.csvdiff3 import csvdiff + + +def test_1_lines_on_both_sides_as_no_header(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + key-1, value-1, value-2, value-3, value-4 + ''').strip()) + rhs.write(textwrap.dedent(''' + key-1, value-1, value-2, value-3, value-4 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-dc', '-Hn'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + ----------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ----------------------------------------------------------------------------------------------------------------------------------------------------- + + くろまる Count & Row number + same lines : 1 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 0 :-- Row Number Pairs -->: [] + ''') + +@pytest.mark.filterwarnings("ignore:Sniffing failed") +def test_1_lines_on_the_left_side_only_as_no_header(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + key-1, value-1, value-2, value-3, value-4 + ''').strip()) + rhs.write(textwrap.dedent(''' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-dc', '-Hn'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + ------------------------------------------------------------------------------------------------ + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------ + 1 ['key-1', 'value-1', 'value-2', 'value-3', 'value-4'] < + + ● Count & Row number + same lines : 0 + left side only (<): 1 :-- Row Numbers -->: [1] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 0 :-- Row Number Pairs -->: [] + ''') + +@pytest.mark.filterwarnings("ignore:Sniffing failed") +def test_1_lines_on_the_right_side_only_as_no_header(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + ''').strip()) + rhs.write(textwrap.dedent(''' + key-1, value-1, value-2, value-3, value-4 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-dc', '-Hn'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + ------------------------------------------------------------------------------------------------ + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------ +> 1 ['key-1', 'value-1', 'value-2', 'value-3', 'value-4'] + + くろまる Count & Row number + same lines : 0 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 1 :-- Row Numbers -->: [1] + with differences (!): 0 :-- Row Number Pairs -->: [] + ''') + +def test_1_lines_on_both_sides_with_header(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5 + key-1, value-1, value-2, value-3, value-4 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5 + key-1, value-1, value-2, value-3, value-4 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-dc', '-Hy'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + ----------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ----------------------------------------------------------------------------------------------------------------------------------------------------- + + くろまる Count & Row number + same lines : 1 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 0 :-- Row Number Pairs -->: [] + ''') + +@pytest.mark.filterwarnings("ignore:Sniffing failed") +def test_1_lines_on_the_left_side_only_with_header(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5 + key-1, value-1, value-2, value-3, value-4 + ''').strip()) + rhs.write(textwrap.dedent(''' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-dc', '-Hy'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + ------------------------------------------------------------------------------------------------ + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------ + 2 ['key-1', 'value-1', 'value-2', 'value-3', 'value-4'] < + + ● Count & Row number + same lines : 0 + left side only (<): 1 :-- Row Numbers -->: [2] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 0 :-- Row Number Pairs -->: [] + ''') + +@pytest.mark.filterwarnings("ignore:Sniffing failed") +def test_1_lines_on_the_right_side_only_with_header(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5 + key-1, value-1, value-2, value-3, value-4 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-dc', '-Hy'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + ------------------------------------------------------------------------------------------------ + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------ +> 2 ['key-1', 'value-1', 'value-2', 'value-3', 'value-4'] + + くろまる Count & Row number + same lines : 0 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 0 :-- Row Number Pairs -->: [] + ''') + + + diff --git a/tests/test_e2e_03_Reporting_count.py b/tests/test_e2e_03_Reporting_count.py new file mode 100644 index 0000000..1cc7143 --- /dev/null +++ b/tests/test_e2e_03_Reporting_count.py @@ -0,0 +1,45 @@ +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +def test_show_number_of_cases(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Count & Row number + same lines : 2 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 3 :-- Row Number Pairs -->: [(3, 4), (4, 5), (8, 7)] + ''') + + + diff --git a/tests/test_e2e_03_Reporting_horizontal.py b/tests/test_e2e_03_Reporting_horizontal.py new file mode 100644 index 0000000..5165791 --- /dev/null +++ b/tests/test_e2e_03_Reporting_horizontal.py @@ -0,0 +1,251 @@ +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +def test_show_difference(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-d'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] ! 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] @ [4] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [1, 3, 4, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [1, 4, 5] + + ''') + + +def test_show_difference_and_number_of_cases(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-dc'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + ● Differences + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] ! 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] @ [4] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [1, 3, 4, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [1, 4, 5] + + ● Count & Row number + same lines : 2 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 3 :-- Row Number Pairs -->: [(3, 4), (4, 5), (8, 7)] + ''') + +def test_show_all_and_number_of_cases(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] ! 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] @ [4] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [1, 3, 4, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [1, 4, 5] + + ● Count & Row number + same lines : 2 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 3 :-- Row Number Pairs -->: [(3, 4), (4, 5), (8, 7)] + ''') + +def test_show_all_and_number_of_cases_with_ignore_column(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] ! 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] @ [4] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 4, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [4, 5] + + ● Count & Row number + same lines : 2 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 3 :-- Row Number Pairs -->: [(3, 4), (4, 5), (8, 7)] + ''') + + +def test_show_all_and_number_of_cases_with_ignore_columns(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + + + diff --git a/tests/test_e2e_03_Reporting_vertical.py b/tests/test_e2e_03_Reporting_vertical.py new file mode 100644 index 0000000..0e6843c --- /dev/null +++ b/tests/test_e2e_03_Reporting_vertical.py @@ -0,0 +1,298 @@ +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +def test_show_difference(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-dv'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + -------------------------------------------------------------------------------- + L left.csv + R right.csv + -------------------------------------------------------------------------------- +> R 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + ! @ [4] + L 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] + R 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + ! @ [1, 3, 4, 5] + L 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] + R 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] + < L 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] + < L 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] + ! @ [1, 4, 5] + L 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] + R 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] + + ''') + + +def test_show_difference_and_number_of_cases(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-dvc'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + ● Differences + -------------------------------------------------------------------------------- + L left.csv + R right.csv + -------------------------------------------------------------------------------- +> R 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + ! @ [4] + L 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] + R 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + ! @ [1, 3, 4, 5] + L 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] + R 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] + < L 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] + < L 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] + ! @ [1, 4, 5] + L 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] + R 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] + + ● Count & Row number + same lines : 2 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 3 :-- Row Number Pairs -->: [(3, 4), (4, 5), (8, 7)] + ''') + +def test_show_all_and_number_of_cases(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-avc'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + -------------------------------------------------------------------------------- + L left.csv + R right.csv + -------------------------------------------------------------------------------- +> R 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + = + L 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + R 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + ! @ [4] + L 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] + R 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + ! @ [1, 3, 4, 5] + L 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] + R 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] + < L 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] + = + L 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + R 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + < L 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] + ! @ [1, 4, 5] + L 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] + R 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] + + ● Count & Row number + same lines : 2 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 3 :-- Row Number Pairs -->: [(3, 4), (4, 5), (8, 7)] + ''') + +def test_show_all_and_number_of_cases_with_ignore_column(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-avc', '-i1'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + -------------------------------------------------------------------------------- + L left.csv + R right.csv + -------------------------------------------------------------------------------- +> R 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + = + L 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + R 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + ! @ [4] + L 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] + R 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + ! @ [3, 4, 5] + L 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] + R 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] + < L 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] + = + L 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + R 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + < L 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] + ! @ [4, 5] + L 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] + R 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] + + ● Count & Row number + same lines : 2 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 3 :-- Row Number Pairs -->: [(3, 4), (4, 5), (8, 7)] + ''') + + +def test_show_all_and_number_of_cases_with_ignore_columns(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 1, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 1, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 1, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-avc', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + -------------------------------------------------------------------------------- + L left.csv + R right.csv + -------------------------------------------------------------------------------- +> R 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + = + L 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + R 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + = + L 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] + R 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + ! @ [3, 5] + L 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] + R 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] + < L 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] + = + L 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + R 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + < L 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] + ! @ [5] + L 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] + R 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + + + diff --git a/tests/test_e2e_04_CSV_column_separator.py b/tests/test_e2e_04_CSV_column_separator.py new file mode 100644 index 0000000..22a335d --- /dev/null +++ b/tests/test_e2e_04_CSV_column_separator.py @@ -0,0 +1,258 @@ +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +def test_both_csv_tab_separated(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + "head1" "head2" "head3" "head4" "head5" "head6" + "1" "value1-2" "key2-2" "1002" "20210921T035902" "value4-2" + "1" "value1-3" "key2-3" "1003" "20210921T035904" "value4-3" + "102" "value1-4" "key2-1" "1004" "20210924T180521" "value4-e" + "1003" "value1-5" "key2-1" "1005" "20210924T180528" "value4-5" + "1003" "value1-6" "key2-2" "1006" "20210923T143259" "value4-6" + "1003" "value1-7" "key2-3" "1007" "20210923T143258" "value4-7" + "1003" "value1-e" "key2-4" "1008" "20210923T143259" "value4-8" + ''').strip()) + rhs.write(textwrap.dedent(''' + "head1" "head2" "head3" "head4" "head5" "head6" + "1" "value1-1" "key2-1" "1001" "20210921T035901" "value4-1" + "1" "value1-2" "key2-2" "1002" "20210921T035902" "value4-2" + "1" "value1-3" "key2-3" "1003" "20210921T035903" "value4-3" + "102" "value1-4e" "key2-1" "1044" "20210924T180529" "value4-4" + "1003" "value1-6" "key2-2" "1006" "20210923T143259" "value4-6" + "1003" "value1-8" "key2-4" "1008" "20210923T143257" "value4-e" + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + +def test_left_tab_right_comma_separated(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + "head1" "head2" "head3" "head4" "head5" "head6" + "1" "value1-2" "key2-2" "1002" "20210921T035902" "value4-2" + "1" "value1-3" "key2-3" "1003" "20210921T035904" "value4-3" + "102" "value1-4" "key2-1" "1004" "20210924T180521" "value4-e" + "1003" "value1-5" "key2-1" "1005" "20210924T180528" "value4-5" + "1003" "value1-6" "key2-2" "1006" "20210923T143259" "value4-6" + "1003" "value1-7" "key2-3" "1007" "20210923T143258" "value4-7" + "1003" "value1-e" "key2-4" "1008" "20210923T143259" "value4-8" + ''').strip()) + rhs.write(textwrap.dedent(''' + 'head1', 'head2', 'head3', 'head4', 'head5', 'head6' + '1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1' + '1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2' + '1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3' + '102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4' + '1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6' + '1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + +def test_left_comma_right_tab_separated(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, "head2", head3, "head4", head5, "head6" + "1", value1-2, "key2-2", 1002, "20210921T035902", value4-2 + 1, "value1-3", key2-3, "1003", 20210921T035904, "value4-3" + "102", value1-4, "key2-1", 1004, "20210924T180521", value4-e + 1003, "value1-5", key2-1, "1005", 20210924T180528, "value4-5" + "1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" + "1003", "value1-7", "key2-3", "1007", "20210923T143258", "value4-7" + 1003, "value1-e", key2-4, "1008", 20210923T143259, "value4-8" + ''').strip()) + rhs.write(textwrap.dedent(''' + 'head1' head2 'head3' head4 'head5' head6 + 1 'value1-1' key2-1 '1001' 20210921T035901 'value4-1' + '1' 'value1-2' 'key2-2' '1002' '20210921T035902' 'value4-2' + '1' value1-3 'key2-3' 1003 '20210921T035903' value4-3 + '102' 'value1-4e' 'key2-1' '1044' '20210924T180529' 'value4-4' + 1003 'value1-6' key2-2 '1006' 20210923T143259 'value4-6' + '1003' 'value1-8' 'key2-4' '1008' '20210923T143257' 'value4-e' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + +def test_left_comma_right_semicolon_separated(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, "head2", head3, "head4", head5, "head6" + "1", value1-2, "key2-2", 1002, "20210921T035902", value4-2 + 1, "value1-3", key2-3, "1003", 20210921T035904, "value4-3" + "102", value1-4, "key2-1", 1004, "20210924T180521", value4-e + 1003, "value1-5", key2-1, "1005", 20210924T180528, "value4-5" + "1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" + "1003", "value1-7", "key2-3", "1007", "20210923T143258", "value4-7" + 1003, "value1-e", key2-4, "1008", 20210923T143259, "value4-8" + ''').strip()) + rhs.write(textwrap.dedent(''' + 'head1';head2;'head3';head4;'head5';head6 + 1;'value1-1';key2-1;'1001';20210921T035901;'value4-1' + '1';'value1-2';'key2-2';'1002';'20210921T035902';'value4-2' + '1';value1-3;'key2-3';1003;'20210921T035903';value4-3 + '102';'value1-4e';'key2-1';'1044';'20210924T180529';'value4-4' + 1003;'value1-6';key2-2;'1006';20210923T143259;'value4-6' + '1003';'value1-8';'key2-4';'1008';'20210923T143257';'value4-e' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + +def test_left_semicolon_right_tab_separated(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1; "head2"; head3; "head4"; head5; "head6" + "1"; value1-2; "key2-2"; 1002; "20210921T035902"; value4-2 + 1; "value1-3"; key2-3; "1003"; 20210921T035904; "value4-3" + "102"; value1-4; "key2-1"; 1004; "20210924T180521"; value4-e + 1003; "value1-5"; key2-1; "1005"; 20210924T180528; "value4-5" + "1003"; "value1-6"; "key2-2"; "1006"; "20210923T143259"; "value4-6" + "1003"; "value1-7"; "key2-3"; "1007"; "20210923T143258"; "value4-7" + 1003; "value1-e"; key2-4; "1008"; 20210923T143259; "value4-8" + ''').strip()) + rhs.write(textwrap.dedent(''' + 'head1' head2 'head3' head4 'head5' head6 + 1 'value1-1' key2-1 '1001' 20210921T035901 'value4-1' + '1' 'value1-2' 'key2-2' '1002' '20210921T035902' 'value4-2' + '1' value1-3 'key2-3' 1003 '20210921T035903' value4-3 + '102' 'value1-4e' 'key2-1' '1044' '20210924T180529' 'value4-4' + 1003 'value1-6' key2-2 '1006' 20210923T143259 'value4-6' + '1003' 'value1-8' 'key2-4' '1008' '20210923T143257' 'value4-e' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + + + diff --git a/tests/test_e2e_04_CSV_file_encoding.py b/tests/test_e2e_04_CSV_file_encoding.py new file mode 100644 index 0000000..5025b00 --- /dev/null +++ b/tests/test_e2e_04_CSV_file_encoding.py @@ -0,0 +1,109 @@ +import os.path +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +TEST_DATA_DIR = 'data/e2e_04_file_encoding' + +def test_file_encoding_utf8(path_to_tests_dir, capfd): + + lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left_UTF-8.csv') + rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right_UTF-8.csv') + + sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-ac'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left_UTF-8.csv right_UTF-8.csv Column indices with difference + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 2 ['1', '値1−1', '1001', '東京', 'ウナ・セラ・ディ東京'] ! 2 ['1', '値1−1', '1001', '東京', 'ウナ・セラ・デイ東京'] @ [4] + 3 ['2', '値1−2', '1002', '大阪', '西長堀アパート'] 3 ['2', '値1−2', '1002', '大阪', '西長堀アパート'] + 4 ['3', '値1−3', '1003', '横浜', '伊勢佐木町ブルースでも歌って'] ! 4 ['3', '値1−3', '1003', '横浜', '伊勢佐木町ブルーズでも歌って'] @ [4] + 5 ['4', '値i−4', '1004', '北海道', '羊蹄山の麓🌱'] ! 5 ['4', '値1−4', '1004', '北海道', '羊蹄山の麓🌱'] @ [1] + 6 ['5', '値1−5', '1005', '三重', '三重県伊賀市忍者村'] ! 6 ['5', '値1−5', '1o05', '二重', '三重県伊賀市忍者村'] @ [2, 3] + 7 ['6', '値1−6', '1006', '新潟', '星峠の棚田🌙'] ! 7 ['6', '値1−6', '1006', '新烏', '星峠の棚田🌟'] @ [3, 4] + 8 ['7', '値1−7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] 8 ['7', '値1−7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] + + くろまる Count & Row number + same lines : 2 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 5 :-- Row Number Pairs -->: [(2, 2), (4, 4), (5, 5), (6, 6), (7, 7)] + ''') + + +def test_file_encoding_shift_jis(path_to_tests_dir, capfd): + + lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left_Shift_JIS.csv') + rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right_Shift_JIS.csv') + + sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-ac', '-e Shift_JIS'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left_Shift_JIS.csv right_Shift_JIS.csv Column indices with difference + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 2 ['1', '値1-1', '1001', '東京', 'ウナ・セラ・ディ東京'] ! 2 ['1', '値1-1', '1001', '東京', 'ウナ・セラ・デイ東京'] @ [4] + 3 ['2', '値1-2', '1002', '大阪', '西長堀アパート'] 3 ['2', '値1-2', '1002', '大阪', '西長堀アパート'] + 4 ['3', '値1-3', '1003', '横浜', '伊勢佐木町ブルースでも歌って'] ! 4 ['3', '値1-3', '1003', '横浜', '伊勢佐木町ブルーズでも歌って'] @ [4] + 5 ['4', '値i-4', '1004', '北海道', '羊蹄山の麓'] ! 5 ['4', '値1-4', '1004', '北海道', '羊蹄山の麓'] @ [1] + 6 ['5', '値1-5', '1005', '三重', '三重県伊賀市忍者村'] ! 6 ['5', '値1-5', '1o05', '二重', '三重県伊賀市忍者村'] @ [2, 3] + 7 ['6', '値1-6', '1006', '新潟', '星峠の棚田'] ! 7 ['6', '値1-6', '1006', '新烏', '星峠の棚田'] @ [3] + 8 ['7', '値1-7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] 8 ['7', '値1-7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] + + くろまる Count & Row number + same lines : 2 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 5 :-- Row Number Pairs -->: [(2, 2), (4, 4), (5, 5), (6, 6), (7, 7)] + ''') + + +def test_file_encoding_euc_jp(path_to_tests_dir, capfd): + + lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left_EUC-JP.csv') + rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right_EUC-JP.csv') + + sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-ac', '-e EUC-JP'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left_EUC-JP.csv right_EUC-JP.csv Column indices with difference + ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 2 ['1', '値1-1', '1001', '東京', 'ウナ・セラ・ディ東京'] ! 2 ['1', '値1-1', '1001', '東京', 'ウナ・セラ・デイ東京'] @ [4] + 3 ['2', '値1-2', '1002', '大阪', '西長堀アパート'] 3 ['2', '値1-2', '1002', '大阪', '西長堀アパート'] + 4 ['3', '値1-3', '1003', '横浜', '伊勢佐木町ブルースでも歌って'] ! 4 ['3', '値1-3', '1003', '横浜', '伊勢佐木町ブルーズでも歌って'] @ [4] + 5 ['4', '値i-4', '1004', '北海道', '羊蹄山の麓'] ! 5 ['4', '値1-4', '1004', '北海道', '羊蹄山の麓'] @ [1] + 6 ['5', '値1-5', '1005', '三重', '三重県伊賀市忍者村'] ! 6 ['5', '値1-5', '1o05', '二重', '三重県伊賀市忍者村'] @ [2, 3] + 7 ['6', '値1-6', '1006', '新潟', '星峠の棚田'] ! 7 ['6', '値1-6', '1006', '新烏', '星峠の棚田'] @ [3] + 8 ['7', '値1-7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] 8 ['7', '値1-7', '1007', '京都', '京都府京都市上京区今出川通烏丸東入上る二筋目東入下る相国寺門前町'] + + くろまる Count & Row number + same lines : 2 + left side only (<): 0 :-- Row Numbers -->: [] + right side only (>): 0 :-- Row Numbers -->: [] + with differences (!): 5 :-- Row Number Pairs -->: [(2, 2), (4, 4), (5, 5), (6, 6), (7, 7)] + ''') + + + diff --git a/tests/test_e2e_04_CSV_line_break.py b/tests/test_e2e_04_CSV_line_break.py new file mode 100644 index 0000000..1063970 --- /dev/null +++ b/tests/test_e2e_04_CSV_line_break.py @@ -0,0 +1,78 @@ +import os.path +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +TEST_DATA_DIR = 'data/e2e_04_line_break' + +def test_line_break_lf(path_to_tests_dir, capfd): + + lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left_lf.csv') + rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right_lf.csv') + + sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left_lf.csv right_lf.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + + +def test_line_break_crlf(path_to_tests_dir, capfd): + + lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left_crlf.csv') + rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right_crlf.csv') + + sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left_crlf.csv right_crlf.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + + + diff --git a/tests/test_e2e_04_CSV_quotation_mark.py b/tests/test_e2e_04_CSV_quotation_mark.py new file mode 100644 index 0000000..b65543b --- /dev/null +++ b/tests/test_e2e_04_CSV_quotation_mark.py @@ -0,0 +1,158 @@ +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +def test_double_quotation_mark_all(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + "head1", "head2", "head3", "head4", "head5", "head6" + "1", "value1-2", "key2-2", "1002", "20210921T035902", "value4-2" + "1", "value1-3", "key2-3", "1003", "20210921T035904", "value4-3" + "102", "value1-4", "key2-1", "1004", "20210924T180521", "value4-e" + "1003", "value1-5", "key2-1", "1005", "20210924T180528", "value4-5" + "1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" + "1003", "value1-7", "key2-3", "1007", "20210923T143258", "value4-7" + "1003", "value1-e", "key2-4", "1008", "20210923T143259", "value4-8" + ''').strip()) + rhs.write(textwrap.dedent(''' + "head1", "head2", "head3", "head4", "head5", "head6" + "1", "value1-1", "key2-1", "1001", "20210921T035901", "value4-1" + "1", "value1-2", "key2-2", "1002", "20210921T035902", "value4-2" + "1", "value1-3", "key2-3", "1003", "20210921T035903", "value4-3" + "102", "value1-4e", "key2-1", "1044", "20210924T180529", "value4-4" + "1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" + "1003", "value1-8", "key2-4", "1008", "20210923T143257", "value4-e" + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + +def test_single_quotation_mark_all(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + 'head1', 'head2', 'head3', 'head4', 'head5', 'head6' + '1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2' + '1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3' + '102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e' + '1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5' + '1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6' + '1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7' + '1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8' + ''').strip()) + rhs.write(textwrap.dedent(''' + 'head1', 'head2', 'head3', 'head4', 'head5', 'head6' + '1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1' + '1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2' + '1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3' + '102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4' + '1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6' + '1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + +def test_double_partially_and_single_partially(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, "head2", head3, "head4", head5, "head6" + "1", value1-2, "key2-2", 1002, "20210921T035902", value4-2 + 1, "value1-3", key2-3, "1003", 20210921T035904, "value4-3" + "102", value1-4, "key2-1", 1004, "20210924T180521", value4-e + 1003, "value1-5", key2-1, "1005", 20210924T180528, "value4-5" + "1003", "value1-6", "key2-2", "1006", "20210923T143259", "value4-6" + "1003", "value1-7", "key2-3", "1007", "20210923T143258", "value4-7" + 1003, "value1-e", key2-4, "1008", 20210923T143259, "value4-8" + ''').strip()) + rhs.write(textwrap.dedent(''' + 'head1', head2, 'head3', head4, 'head5', head6 + 1, 'value1-1', key2-1, '1001', 20210921T035901, 'value4-1' + '1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2' + '1', value1-3, 'key2-3', 1003, '20210921T035903', value4-3 + '102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4' + 1003, 'value1-6', key2-2, '1006', 20210923T143259, 'value4-6' + '1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e' + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] 3 ['1', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] 4 ['1', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 3 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 2 :-- Row Number Pairs -->: [(4, 5), (8, 7)] + ''') + + + diff --git a/tests/test_e2e_04_CSV_special_char_in_value.py b/tests/test_e2e_04_CSV_special_char_in_value.py new file mode 100644 index 0000000..91f7854 --- /dev/null +++ b/tests/test_e2e_04_CSV_special_char_in_value.py @@ -0,0 +1,45 @@ +import os.path +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +TEST_DATA_DIR = 'data/e2e_04_special_char' + +def test_special_character_in_value(path_to_tests_dir, capfd): + """ special character: line-break, comma, tab, single-quotation, double-quotation, semicolon """ + + lhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'left.csv') + rhs_csv = os.path.join(path_to_tests_dir, TEST_DATA_DIR, 'right.csv') + + sys.argv = ['csvdiff.py', lhs_csv, rhs_csv, '-k0:4,2', '-ac', '-i1,4'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる All + --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +> 2 ['1', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + 2 ['1', 'value1 and \\nvalue-2', 'key2-2', '1,002', '20210921;035902', "value'4'tab\\t2"] 3 ['1', 'value1 and \\nvalue-2', 'key2-2', '1,002', '20210921;035902', "value'4'tab\\t2"] + 3 ['1', 'value1-3', 'key2-3', '1003', '20210921\\nT035904', 'value"4"\\n-3'] ! 4 ['1', 'value1-3', 'key2-3', '1003', '20210921\\nT035904', 'value"4"-\\n3'] @ [5] + 4 ['102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [3, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] < + 6 ['1003', 'value1 and\\nvalue-6', 'key2-2', '1,006', '20210923T143259', 'value4tab\\t6'] ! 6 ['1003', 'value1 and\\nvalue6', 'key2-2', '1006', '20210923;143259', 'value4\\t6'] @ [3, 5] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [5] + + ● Count & Row number + same lines : 1 + left side only (<): 2 :-- Row Numbers -->: [5, 7] + right side only (>): 1 :-- Row Numbers -->: [2] + with differences (!): 4 :-- Row Number Pairs -->: [(3, 4), (4, 5), (6, 6), (8, 7)] + ''') + + + diff --git a/tests/test_e2e_05_Option.py b/tests/test_e2e_05_Option.py new file mode 100644 index 0000000..e45e906 --- /dev/null +++ b/tests/test_e2e_05_Option.py @@ -0,0 +1,94 @@ +import sys +import textwrap + +import pytest + +from src.csvdiff3 import csvdiff + + +def test_option_k_not_specified(lhs, rhs, capfd): + """ Run with column 0 as the matching key. """ + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 0001, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 0001, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 0102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 0001, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 0001, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 0001, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 0102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-d'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Differences + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + left.csv right.csv Column indices with difference + -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 2 ['0001', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] ! 2 ['0001', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] @ [1, 2, 3, 4, 5] + 3 ['0001', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] ! 3 ['0001', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] @ [1, 2, 3, 4, 5] +> 4 ['0001', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + 4 ['0102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] ! 5 ['0102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] @ [1, 3, 4, 5] + 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] ! 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] @ [1, 2, 3, 4, 5] + 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] ! 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] @ [1, 2, 3, 4, 5] + 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] < + 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] < + + ''') + +def test_option_u_specified(lhs, rhs, capfd): + """ + Run the matching key as unique. + So if it detects that the matching key is not unique, an error will occur. + (Matching key duplication detection feature) + """ + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 0001, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 0001, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 0102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 0001, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 0001, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 0001, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 0102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-du'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + out, err = capfd.readouterr() + assert str(err).find('are not unique.')> 0 + assert out == '' + + + diff --git a/tests/test_e2e_71_for_Developer.py b/tests/test_e2e_71_for_Developer.py new file mode 100644 index 0000000..96992b8 --- /dev/null +++ b/tests/test_e2e_71_for_Developer.py @@ -0,0 +1,154 @@ +import sys +import textwrap + +from src.csvdiff3 import csvdiff + + +def test_option_x_1(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 0001, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 0001, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 0102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 0001, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 0001, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 0001, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 0102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-x'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Context + File Path on the Left-Hand Side: {lhs_file_path} + File Path on the Right-Hand Side : {rhs_file_path} + Matching Key Indices: [MatchingKeyInfo(0, '')] + Matching Key Is Unique?: False + Column Indices to Ignore: [] + with Header?: True + Report Style: Two facing (Horizontal) + Show Count?: True + Show Difference Only?: False + Show All?: False + Show Context?: True + File Encoding for Left-Hand Side: utf8 + File Encoding for Right-Hand Side: utf8 + CSV Sniffing Size: 4096 + --- csv analysis conditions --- + Forces Individual Specified Conditions?: False + column_separator_for_lhs: COMMA + column_separator_for_rhs: COMMA + line_separator_for_lhs: 0d0a + line_separator_for_rhs: 0d0a + quote_char_for_lhs: " + quote_char_for_rhs: " + skips_space_after_column_separator_for_lhs: True + skips_space_after_column_separator_for_rhs: True + + くろまる Count & Row number + same lines : 0 + left side only (<): 2 :-- Row Numbers -->: [7, 8] + right side only (>): 1 :-- Row Numbers -->: [4] + with differences (!): 5 :-- Row Number Pairs -->: [(2, 2), (3, 3), (4, 5), (5, 6), (6, 7)] + ''').format(lhs_file_path=lhs.strpath, rhs_file_path=rhs.strpath) + + +def test_option_x_2(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 0001, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 0001, value1-3, key2-3, 1003, 20210921T035904, value4-3 + 0102, value1-4, key2-1, 1004, 20210924T180521, value4-e + 1003, value1-5, key2-1, 1005, 20210924T180528, value4-5 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-7, key2-3, 1007, 20210923T143258, value4-7 + 1003, value1-e, key2-4, 1008, 20210923T143259, value4-8 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5, head6 + 0001, value1-1, key2-1, 1001, 20210921T035901, value4-1 + 0001, value1-2, key2-2, 1002, 20210921T035902, value4-2 + 0001, value1-3, key2-3, 1003, 20210921T035903, value4-3 + 0102, value1-4e, key2-1, 1044, 20210924T180529, value4-4 + 1003, value1-6, key2-2, 1006, 20210923T143259, value4-6 + 1003, value1-8, key2-4, 1008, 20210923T143257, value4-e + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:4,2', '-avx'] + csvdiff.main() + + out, err = capfd.readouterr() + assert err == '' + assert out == textwrap.dedent(''' + ============ Report ============ + + くろまる Context + File Path on the Left-Hand Side: {lhs_file_path} + File Path on the Right-Hand Side : {rhs_file_path} + Matching Key Indices: [MatchingKeyInfo(0, 4), MatchingKeyInfo(2, '')] + Matching Key Is Unique?: False + Column Indices to Ignore: [] + with Header?: True + Report Style: Vertical + Show Count?: False + Show Difference Only?: False + Show All?: True + Show Context?: True + File Encoding for Left-Hand Side: utf8 + File Encoding for Right-Hand Side: utf8 + CSV Sniffing Size: 4096 + --- csv analysis conditions --- + Forces Individual Specified Conditions?: False + column_separator_for_lhs: COMMA + column_separator_for_rhs: COMMA + line_separator_for_lhs: 0d0a + line_separator_for_rhs: 0d0a + quote_char_for_lhs: " + quote_char_for_rhs: " + skips_space_after_column_separator_for_lhs: True + skips_space_after_column_separator_for_rhs: True + + くろまる All + -------------------------------------------------------------------------------- + L left.csv + R right.csv + -------------------------------------------------------------------------------- +> R 2 ['0001', 'value1-1', 'key2-1', '1001', '20210921T035901', 'value4-1'] + = + L 2 ['0001', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + R 3 ['0001', 'value1-2', 'key2-2', '1002', '20210921T035902', 'value4-2'] + ! @ [4] + L 3 ['0001', 'value1-3', 'key2-3', '1003', '20210921T035904', 'value4-3'] + R 4 ['0001', 'value1-3', 'key2-3', '1003', '20210921T035903', 'value4-3'] + ! @ [1, 3, 4, 5] + L 4 ['0102', 'value1-4', 'key2-1', '1004', '20210924T180521', 'value4-e'] + R 5 ['0102', 'value1-4e', 'key2-1', '1044', '20210924T180529', 'value4-4'] + < L 5 ['1003', 'value1-5', 'key2-1', '1005', '20210924T180528', 'value4-5'] + = + L 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + R 6 ['1003', 'value1-6', 'key2-2', '1006', '20210923T143259', 'value4-6'] + < L 7 ['1003', 'value1-7', 'key2-3', '1007', '20210923T143258', 'value4-7'] + ! @ [1, 4, 5] + L 8 ['1003', 'value1-e', 'key2-4', '1008', '20210923T143259', 'value4-8'] + R 7 ['1003', 'value1-8', 'key2-4', '1008', '20210923T143257', 'value4-e'] + + ''').format(lhs_file_path=lhs.strpath, rhs_file_path=rhs.strpath) + + + diff --git a/tests/test_e2e_91_Exceptional_file_path.py b/tests/test_e2e_91_Exceptional_file_path.py new file mode 100644 index 0000000..12c5bf9 --- /dev/null +++ b/tests/test_e2e_91_Exceptional_file_path.py @@ -0,0 +1,102 @@ +import sys +import textwrap + +import pytest + +from src.csvdiff3 import csvdiff + + +def test_misspecification_of_csv_file_path_on_the_left(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + ''').strip()) + + sys.argv = ['csvdiff.py', 'not_exists' + lhs.strpath, rhs.strpath, '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find("lhs_file_path not exists. ")> 0 + +def test_misspecification_of_csv_file_path_on_the_right(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, 'not_exists' + rhs.strpath, '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find("rhs_file_path not exists. ")> 0 + +def test_specified_left_csv_file_path_is_directory(lhs, rhs, lhs_dir, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs_dir.strpath, rhs.strpath, '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find("lhs_file_path is not a file.")> 0 + +def test_specified_right_csv_file_path_is_directory(lhs, rhs, rhs_dir, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs_dir.strpath, '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find("rhs_file_path is not a file.")> 0 + + + diff --git a/tests/test_e2e_91_Exceptional_matching_key.py b/tests/test_e2e_91_Exceptional_matching_key.py new file mode 100644 index 0000000..3cb0457 --- /dev/null +++ b/tests/test_e2e_91_Exceptional_matching_key.py @@ -0,0 +1,110 @@ +import sys +import textwrap + +import pytest + +from src.csvdiff3 import csvdiff + + +def test_specified_non_numeric_value_for_matching_key_index(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-ko', '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find('MATCHING_KEY_INDICES should be a number. See also help. [specified index=o]')> 0 + +def test_specified_non_numeric_value_for_matching_key_max_length(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + 1, value1-1, value2-1, value3-1 + 12, value1-2, value2-2, value3-2 + 103, value1-3, value2-3, value3-3 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + 1, value1-1, value2-1, value3-1 + 12, value1-2, value2-2, value3-2 + 103, value1-3, value2-3, value3-3 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:B', '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find('MATCHING_KEY_INDICES should be a number. See also help. [specified max_length=B]')> 0 + +def test_specified_out_of_range_index_for_matching_key_index_1(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k4', '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find("one of the indices specified for MATCHING_KEY_INDICES is out of range")> 0 + +def test_specified_out_of_range_index_for_matching_key_index_2(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0,4', '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find("one of the indices specified for MATCHING_KEY_INDICES is out of range")> 0 + + + diff --git a/tests/test_e2e_91_Exceptional_not_sorted.py b/tests/test_e2e_91_Exceptional_not_sorted.py new file mode 100644 index 0000000..9c60674 --- /dev/null +++ b/tests/test_e2e_91_Exceptional_not_sorted.py @@ -0,0 +1,60 @@ +import sys +import textwrap + +import pytest + +from src.csvdiff3 import csvdiff + + +def test_string_matching_key_not_sorted(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-3, value1-2, value2-2, value3-2 + key1-2, value1-3, value2-3, value3-3 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find("are not sorted. [current_key=['key1-2'], previous_key=['key1-3']")> 0 + +def test_numerical_matching_key_not_sorted(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + 1, value1-1, value2-1, value3-1 + 103, value1-3, value2-3, value3-3 + 12, value1-2, value2-2, value3-2 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + 1, value1-1, value2-1, value3-1 + 12, value1-2, value2-2, value3-2 + 103, value1-3, value2-3, value3-3 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-k0:3', '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find("are not sorted. [current_key=['012'], previous_key=['103']")> 0 + + + diff --git a/tests/test_e2e_91_Exceptional_number_of_columns.py b/tests/test_e2e_91_Exceptional_number_of_columns.py new file mode 100644 index 0000000..e48bebd --- /dev/null +++ b/tests/test_e2e_91_Exceptional_number_of_columns.py @@ -0,0 +1,62 @@ +import sys +import textwrap + +import pytest + +from src.csvdiff3 import csvdiff + + +@pytest.mark.filterwarnings("ignore:Sniffing failed") +def test_different_number_of_columns_for_header_and_body(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4, head5 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find('IndexError')> 0 + +@pytest.mark.filterwarnings("ignore:Sniffing failed") +def test_different_number_of_columns_between_rows_of_body(lhs, rhs, capfd): + + lhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2 + key1-3, value1-3, value2-3, value3-3 + ''').strip()) + rhs.write(textwrap.dedent(''' + head1, head2, head3, head4 + key1-1, value1-1, value2-1, value3-1 + key1-2, value1-2, value2-2, value3-2 + key1-3, value1-3, value2-3 + ''').strip()) + + sys.argv = ['csvdiff.py', lhs.strpath, rhs.strpath, '-d'] + with pytest.raises(SystemExit) as e: + csvdiff.main() + + assert e.type == SystemExit + assert e.value.code == 1 + + _, err = capfd.readouterr() + assert str(err).find('IndexError')> 0 + + +

AltStyle によって変換されたページ (->オリジナル) /