diff --git a/README.md b/README.md index 2899835..301872d 100644 --- a/README.md +++ b/README.md @@ -132,3 +132,13 @@ These queries are examples generated by chatgpt atm and not yet tested or based Feel free to modify or extend the script to suit your specific business needs. +# Some depedencies + +- pandas +- requests +- pyvat +- sqlite3 +- SPARQLWrapper +- os +- openpyxl +- xlrd diff --git a/example-filltable.py b/example-filltable.py new file mode 100644 index 0000000..a2784d3 --- /dev/null +++ b/example-filltable.py @@ -0,0 +1,178 @@ +# Re-running the script due to the environment reset + +import requests +import pandas as pd +import pyvat +import sqlite3 +import pyarrow +from SPARQLWrapper import SPARQLWrapper, JSON +import os +from datetime import date +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +# Database setup +db_path = 'ttaxt.db' +conn = sqlite3.connect(db_path) +cursor = conn.cursor() + +logging.info("Database connection established") + +# Create necessary tables +# Reverse charge values (as defined in pyvat): +# 1 - Charge VAT +# 2 - Reverse charge +# 3 - No charge +cursor.executescript(''' +CREATE TABLE IF NOT EXISTS countries ( + country_code TEXT PRIMARY KEY, + country_name TEXT, + continent TEXT, + currency TEXT +); + +CREATE TABLE IF NOT EXISTS vat_rules ( + country_code TEXT, + is_registered INTEGER, + vat_rate REAL DEFAULT NULL, + vat_action INTEGER DEFAULT NULL, + PRIMARY KEY (country_code, is_registered) +); +''') + +logging.info("Tables created successfully") + +# Function to fetch country and currency data from Wikidata using SPARQL +def fetch_wikidata_countries_and_currency(): + parquet_file = 'wikidata_countries_currency.parquet' + + if os.path.exists(parquet_file): + # Load data from parquet file if it exists + return pd.read_parquet(parquet_file) + else: + sparql = SPARQLWrapper("https://query.wikidata.org/sparql") + sparql.setQuery(""" + SELECT ?countryLabel ?iso_alpha_2 ?currency_code ?continentLabel WHERE { + ?country wdt:P31 wd:Q6256; # Select countries + wdt:P297 ?iso_alpha_2; # Get ISO 3166-1 alpha-2 codes + wdt:P38 ?currency; # Get currency + wdt:P30 ?continent. # Get continent + ?currency wdt:P498 ?currency_code. # Get ISO 4217 currency code + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } + } + """) + sparql.setReturnFormat(JSON) + results = sparql.query().convert() + + # Process the data + data = [] + for result in results["results"]["bindings"]: + country_name = result["countryLabel"]["value"] + iso_alpha_2 = result["iso_alpha_2"]["value"] + currency_code = result["currency_code"]["value"] + continent = result["continentLabel"]["value"] + data.append((iso_alpha_2, country_name, continent, currency_code)) + logging.info(f"Appended data: {iso_alpha_2}, {country_name}, {continent}, {currency_code}") + + df = pd.DataFrame(data, columns=['Country Code', 'Country Name', 'Continent', 'Currency']) + logging.info(f"Created DataFrame with {len(df)} rows") + + # Save the data to a parquet file + df.to_parquet(parquet_file) + logging.info(f"Saved DataFrame to parquet file: {parquet_file}") + + return df + +# Set initial seller +seller = pyvat.Party('NL', is_business=True) # Assuming the seller is a business in the Netherlands + +# 1. Fetch country, currency, and continent data using SPARQL +logging.info("Fetching country data from Wikidata") +country_data_df = fetch_wikidata_countries_and_currency() +logging.info(f"Fetched {len(country_data_df)} countries from Wikidata") +logging.info(f"First few rows of country data:\n{country_data_df.head()}") + +# 2. Download and cross-check with ISO 4217 XLS +iso_4217_url = "https://www.six-group.com/dam/download/financial-information/data-center/iso-currrency/lists/list-one.xls" +local_file_path = "iso_4217_list_one.xls" + +# Download the file if it doesn't exist +if not os.path.exists(local_file_path): + logging.info(f"Downloading ISO 4217 XLS file from {iso_4217_url}") + response = requests.get(iso_4217_url) + with open(local_file_path, 'wb') as file: + file.write(response.content) + logging.info("ISO 4217 XLS file downloaded successfully") +else: + logging.info("ISO 4217 XLS file already exists") + +# Read the Excel file, starting from row 4 (0-based index, so 3) +iso_4217_xls = pd.read_excel(local_file_path, sheet_name=0, header=3) +iso_4217_valid_codes = iso_4217_xls['Alphabetic Code'].dropna().tolist() +logging.info(f"Found {len(iso_4217_valid_codes)} valid currency codes in ISO 4217 XLS") + +# Filter out defunct currency codes +valid_countries_df = country_data_df[country_data_df['Currency'].isin(iso_4217_valid_codes)] +logging.info(f"Filtered to {len(valid_countries_df)} countries with valid currencies") +logging.info(f"First few rows of valid countries:\n{valid_countries_df.head()}") + +# Populate the countries table with filtered data +logging.info("Populating countries table") +for index, row in valid_countries_df.iterrows(): + cursor.execute(''' + INSERT OR REPLACE INTO countries (country_code, country_name, continent, currency) + VALUES (?, ?, ?, ?) + ''', (row['Country Code'], row['Country Name'], row['Continent'], row['Currency'])) + logging.info(f"Inserted country: {row['Country Name']} ({row['Country Code']}) into the database") + +conn.commit() +logging.info("Countries table populated successfully") + +# 3. Populate VAT rules using pyvat +logging.info("Populating VAT rules") +current_date = date.today() +item_type = pyvat.ItemType.generic_electronic_service + +business_types = [True, False] + +total_countries = len(valid_countries_df) +for business_type in business_types: + logging.info(f"Processing VAT rules for {'business' if business_type else 'non-business'} customers") + for index, row in valid_countries_df.iterrows(): + country_code = row['Country Code'] + country_name = row['Country Name'] + buyer = pyvat.Party(country_code, is_business=business_type) + vat_charge = pyvat.get_sale_vat_charge(current_date, item_type, buyer, seller) + vat_rate = float(vat_charge.rate) if vat_charge.rate is not None else None + is_registered = 1 if business_type else 0 # Convert boolean to integer + vat_action = vat_charge.action.value if vat_charge.action is not None else None + + cursor.execute(''' + INSERT OR REPLACE INTO vat_rules (country_code, is_registered, vat_rate, vat_action) + VALUES (?, ?, ?, ?) + ''', (country_code, is_registered, vat_rate, vat_action)) + + logging.info(f"Processed VAT rules for {country_name} ({country_code}): VAT rate = {vat_rate}, is_registered = {is_registered}, VAT action = {vat_action}") + + conn.commit() + logging.info(f"VAT rules for {'business' if business_type else 'non-business'} customers committed to database") + +# print table +logging.info("Fetching all VAT rules from database") +cursor.execute(''' + SELECT * FROM vat_rules +''') +vat_rules = cursor.fetchall() +logging.info(f"Total VAT rules: {len(vat_rules)}") +logging.info(f"First few VAT rules:\n{vat_rules[:5]}") + +# Commit changes and close the database +conn.commit() +conn.close() +logging.info("Database connection closed") + +# Provide the path to the created SQLite database +logging.info(f"SQLite database created at: {db_path}") +db_path diff --git a/iso_4217_list_one.xls b/iso_4217_list_one.xls new file mode 100644 index 0000000..7d4b5de Binary files /dev/null and b/iso_4217_list_one.xls differ diff --git a/ttaxt.db b/ttaxt.db new file mode 100644 index 0000000..28ed1fb Binary files /dev/null and b/ttaxt.db differ diff --git a/wikidata_countries_currency.parquet b/wikidata_countries_currency.parquet new file mode 100644 index 0000000..bc7f2e0 Binary files /dev/null and b/wikidata_countries_currency.parquet differ