- populate property address data
Select *
From portfolio_1.NashvilleHousing
Order by ParcelID
- Convert date-time to date
SELECT SaleDate cast(SaleDate as date) as SaleDateConverted
FROM portfolio_1.NashvilleHousing
- fill in null(propertyaddress) with that of parcel ID
SELECT NH.ParcelID, NH.PropertyAddress, NH2.ParcelID, NH2.PropertyAddress, ifnull(NH.PropertyAddress, NH2.PropertyAddress)
FROM portfolio_1.NashvilleHousing NH
join portfolio_1.NashvilleHousing NH2 On NH.parcelID = NH2.PropertyAddress and NH.UniqueID <> NH2.UniqueID
where NH.PropertyAddress is null
limit 50
Update NH
set PropertyAddress = ifnull(NH.PropertyAddress, NH2.PropertyAddress)
FROM portfolio_1.NashvilleHousing NH
join portfolio_1.NashvilleHousing NH2 On NH.parcelID = NH2.PropertyAddress and NH.UniqueID <> NH2.UniqueID
where NH.PropertyAddress is null
- Separate the Property Address with streetnames and states
Select PropertyAddress
From portfolio_1.NashvilleHousing
SELECT PropertyAddress, split(PropertyAddress, ',') [offset(0)] as Street,split(PropertyAddress, ',') [offset(1)] as City,
From portfolio_1.NashvilleHousing
-- Update NH
-- set PropertyAddressStreet= split(PropertyAddress, ',') [offset(0)] as Street
and PropertyAddressCity = split(PropertyAddress, ',') [offset(1)] as City,
- Separate the Owner Address with streetnames and states
SELECT OwnerAddress, split(OwnerAddress, ',') [offset(0)] as Street, split(OwnerAddress, ',') [offset(1)] as City, split(OwnerAddress, ',') [offset(2)] as State
From portfolio_1.NashvilleHousing
Update portfolio_1.NH
Set OwnerAddressStreet= split(OwnerAddress, ',') [offset(0)] as Street
and OwnerAddressCity = split(OwnerAddress, ',') [offset(1)] as City,
OwnerAddressState = split(OwnerAddress, ',') [offset(1)] as State,
- Change Y and N to Yes and No in Sold as Vacant field
Select SoldAsVacant, Case when SoldAsVacant= 'False' then 'No'
when SoldAsVacant = 'True' then 'Yes'
else SoldAsVacant
End
From portfolio_1.NashvilleHousing
Update portfolio_1.NH
Set SoldAsVacant = Case when SoldAsVacant= 'False' then 'No'
when SoldAsVacant = 'True' then 'Yes'
else SoldAsVacant
End
- Remove Duplicates= "parcelID, propertyAddress" 등등으로 줄세워서 테이블 생성함
with RowNumCTE As(
Select *,
row_number() over (
partition by ParcelID,
PropertyAddress,
SalePrice,
SaleDate,
LegalReference
Order by UniqueID) as RowNum
From portfolio_1.NashvilleHousing
-- order by parcelID
)
DELETE
FROM RowNumCTE
where row_num >1
with RowNumCTE As(
Select *,
row_number() over (
partition by ParcelID,
PropertyAddress,
SalePrice,
SaleDate,
LegalReference
Order by UniqueID) as RowNum
From portfolio_1.NashvilleHousing
-- order by parcelID
)
SELECT *
FROM RowNumCTE
- Delete Unused Columns
Select *
From portfolio_1.NashvilleHousing
Alter table portfolio_1.NashvilleHousing
drop column OwnerAddress
'Programming > Google Data Analytics Certificate' 카테고리의 다른 글
SQL - Alex the Analyst portfolio 1 in Big Query (0) | 2023.04.05 |
---|---|
R (0) | 2023.04.05 |
[GA] 구글 애널리틱스 프로그램 - R 기초 (0) | 2023.04.04 |
[GA] 구글 애널리틱스 프로그램 - R 기초 (0) | 2023.04.04 |
[GA] 구글 애널리틱스 프로그램 - R 기초 (0) | 2023.04.03 |