# Re-running the script due to the environment reset import requests import pandas as pd import pyvat import sqlite3 import pyarrow from SPARQLWrapper import SPARQLWrapper, JSON import os from datetime import date import logging # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Database setup db_path = 'ttaxt.db' conn = sqlite3.connect(db_path) cursor = conn.cursor() logging.info("Database connection established") # Create necessary tables # Reverse charge values (as defined in pyvat): # 1 - Charge VAT # 2 - Reverse charge # 3 - No charge cursor.executescript(''' CREATE TABLE IF NOT EXISTS countries ( country_code TEXT PRIMARY KEY, country_name TEXT, continent TEXT, currency TEXT ); CREATE TABLE IF NOT EXISTS vat_rules ( country_code TEXT, is_registered INTEGER, vat_rate REAL DEFAULT NULL, vat_action INTEGER DEFAULT NULL, PRIMARY KEY (country_code, is_registered) ); ''') logging.info("Tables created successfully") # Function to fetch country and currency data from Wikidata using SPARQL def fetch_wikidata_countries_and_currency(): parquet_file = 'wikidata_countries_currency.parquet' if os.path.exists(parquet_file): # Load data from parquet file if it exists return pd.read_parquet(parquet_file) else: sparql = SPARQLWrapper("https://query.wikidata.org/sparql") sparql.setQuery(""" SELECT ?countryLabel ?iso_alpha_2 ?currency_code ?continentLabel WHERE { ?country wdt:P31 wd:Q6256; # Select countries wdt:P297 ?iso_alpha_2; # Get ISO 3166-1 alpha-2 codes wdt:P38 ?currency; # Get currency wdt:P30 ?continent. # Get continent ?currency wdt:P498 ?currency_code. # Get ISO 4217 currency code SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """) sparql.setReturnFormat(JSON) results = sparql.query().convert() # Process the data data = [] for result in results["results"]["bindings"]: country_name = result["countryLabel"]["value"] iso_alpha_2 = result["iso_alpha_2"]["value"] currency_code = result["currency_code"]["value"] continent = result["continentLabel"]["value"] data.append((iso_alpha_2, country_name, continent, currency_code)) logging.info(f"Appended data: {iso_alpha_2}, {country_name}, {continent}, {currency_code}") df = pd.DataFrame(data, columns=['Country Code', 'Country Name', 'Continent', 'Currency']) logging.info(f"Created DataFrame with {len(df)} rows") # Save the data to a parquet file df.to_parquet(parquet_file) logging.info(f"Saved DataFrame to parquet file: {parquet_file}") return df # Set initial seller seller = pyvat.Party('NL', is_business=True) # Assuming the seller is a business in the Netherlands # 1. Fetch country, currency, and continent data using SPARQL logging.info("Fetching country data from Wikidata") country_data_df = fetch_wikidata_countries_and_currency() logging.info(f"Fetched {len(country_data_df)} countries from Wikidata") logging.info(f"First few rows of country data:\n{country_data_df.head()}") # 2. Download and cross-check with ISO 4217 XLS iso_4217_url = "https://www.six-group.com/dam/download/financial-information/data-center/iso-currrency/lists/list-one.xls" local_file_path = "iso_4217_list_one.xls" # Download the file if it doesn't exist if not os.path.exists(local_file_path): logging.info(f"Downloading ISO 4217 XLS file from {iso_4217_url}") response = requests.get(iso_4217_url) with open(local_file_path, 'wb') as file: file.write(response.content) logging.info("ISO 4217 XLS file downloaded successfully") else: logging.info("ISO 4217 XLS file already exists") # Read the Excel file, starting from row 4 (0-based index, so 3) iso_4217_xls = pd.read_excel(local_file_path, sheet_name=0, header=3) iso_4217_valid_codes = iso_4217_xls['Alphabetic Code'].dropna().tolist() logging.info(f"Found {len(iso_4217_valid_codes)} valid currency codes in ISO 4217 XLS") # Filter out defunct currency codes valid_countries_df = country_data_df[country_data_df['Currency'].isin(iso_4217_valid_codes)] logging.info(f"Filtered to {len(valid_countries_df)} countries with valid currencies") logging.info(f"First few rows of valid countries:\n{valid_countries_df.head()}") # Populate the countries table with filtered data logging.info("Populating countries table") for index, row in valid_countries_df.iterrows(): cursor.execute(''' INSERT OR REPLACE INTO countries (country_code, country_name, continent, currency) VALUES (?, ?, ?, ?) ''', (row['Country Code'], row['Country Name'], row['Continent'], row['Currency'])) logging.info(f"Inserted country: {row['Country Name']} ({row['Country Code']}) into the database") conn.commit() logging.info("Countries table populated successfully") # 3. Populate VAT rules using pyvat logging.info("Populating VAT rules") current_date = date.today() item_type = pyvat.ItemType.generic_electronic_service business_types = [True, False] total_countries = len(valid_countries_df) for business_type in business_types: logging.info(f"Processing VAT rules for {'business' if business_type else 'non-business'} customers") for index, row in valid_countries_df.iterrows(): country_code = row['Country Code'] country_name = row['Country Name'] buyer = pyvat.Party(country_code, is_business=business_type) vat_charge = pyvat.get_sale_vat_charge(current_date, item_type, buyer, seller) vat_rate = float(vat_charge.rate) if vat_charge.rate is not None else None is_registered = 1 if business_type else 0 # Convert boolean to integer vat_action = vat_charge.action.value if vat_charge.action is not None else None cursor.execute(''' INSERT OR REPLACE INTO vat_rules (country_code, is_registered, vat_rate, vat_action) VALUES (?, ?, ?, ?) ''', (country_code, is_registered, vat_rate, vat_action)) logging.info(f"Processed VAT rules for {country_name} ({country_code}): VAT rate = {vat_rate}, is_registered = {is_registered}, VAT action = {vat_action}") conn.commit() logging.info(f"VAT rules for {'business' if business_type else 'non-business'} customers committed to database") # print table logging.info("Fetching all VAT rules from database") cursor.execute(''' SELECT * FROM vat_rules ''') vat_rules = cursor.fetchall() logging.info(f"Total VAT rules: {len(vat_rules)}") logging.info(f"First few VAT rules:\n{vat_rules[:5]}") # Commit changes and close the database conn.commit() conn.close() logging.info("Database connection closed") # Provide the path to the created SQLite database logging.info(f"SQLite database created at: {db_path}") db_path